From 950a7a9c7f730d67ad6086e703a8a397eeb5ec2f Mon Sep 17 00:00:00 2001
From: jce <jce@unknown>
Date: Fri, 12 Jul 2002 22:07:40 +0000
Subject: [PATCH] cleaned up some of the job_step stuff.

---
 src/common/slurm_protocol_defs.h |   6 +-
 src/common/slurm_protocol_pack.c |   8 ++-
 src/slurmctld/controller.c       |  25 ++++----
 src/slurmctld/slurmctld.h        |  12 +---
 src/slurmctld/step_mgr.c         | 102 +++++++++++++++++--------------
 5 files changed, 82 insertions(+), 71 deletions(-)

diff --git a/src/common/slurm_protocol_defs.h b/src/common/slurm_protocol_defs.h
index 2c1c4d3184a..9550dc19ff1 100644
--- a/src/common/slurm_protocol_defs.h
+++ b/src/common/slurm_protocol_defs.h
@@ -220,12 +220,16 @@ typedef struct slurm_node_registration_status_msg
 
 typedef struct job_step_create_request_msg 
 {
+	uint32_t job_id;
+	uint32_t user_id;
 	uint32_t node_count;
-	uint32_t proc_count;
+	uint32_t cpu_count;
 	uint16_t relative;
 	char* node_list;
 } job_step_create_request_msg_t; 
 
+typedef struct job_step_create_request_msg job_step_specs_t;
+
 typedef struct job_step_create_response_msg 
 {
 	uint32_t job_step_id;
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index 7286835b9d5..b86bb119908 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -540,8 +540,10 @@ void pack_job_step_create_request_msg ( job_step_create_request_msg_t* msg , voi
 {
 	assert ( msg != NULL );
 
+	pack32 ( msg -> job_id, ( void ** ) buffer , length ) ;
+	pack32 ( msg -> user_id, ( void ** ) buffer , length ) ;
 	pack32 ( msg -> node_count, ( void ** ) buffer , length ) ;
-	pack32 ( msg -> proc_count, ( void ** ) buffer , length ) ;
+	pack32 ( msg -> cpu_count, ( void ** ) buffer , length ) ;
 	pack16 ( msg -> relative, ( void ** ) buffer , length ) ;
 	packstr ( msg -> node_list, ( void ** ) buffer , length ) ;
 }
@@ -555,8 +557,10 @@ int unpack_job_step_create_request_msg ( job_step_create_request_msg_t** msg , v
 	if (tmp_ptr == NULL) 
 		return ENOMEM;
 
+	unpack32 ( &( tmp_ptr -> job_id), ( void ** ) buffer , length ) ;
+	unpack32 ( &( tmp_ptr -> user_id), ( void ** ) buffer , length ) ;
 	unpack32 ( &( tmp_ptr -> node_count), ( void ** ) buffer , length ) ;
-	unpack32 ( &( tmp_ptr -> proc_count), ( void ** ) buffer , length ) ;
+	unpack32 ( &( tmp_ptr -> cpu_count), ( void ** ) buffer , length ) ;
 	unpack16 ( &( tmp_ptr -> relative), ( void ** ) buffer , length ) ;
 	unpackstr_xmalloc ( &( tmp_ptr -> node_list ), &uint16_tmp,  ( void ** ) buffer , length ) ;
 
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index d1ba83f50fb..86f7b9fc164 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -669,38 +669,39 @@ void
 slurm_rpc_job_step_create( slurm_msg_t* msg )
 {
 	/* init */
-	int error_code=0;
+	int error_code;
 	clock_t start_time;
 
 	slurm_msg_t resp;
+	struct step_record* step_rec;
 	job_step_create_response_msg_t job_step_resp;
 	job_step_create_request_msg_t * req_step_msg = 
 			( job_step_create_request_msg_t* ) msg-> data ;
 
 	start_time = clock ();
 
-	/* do RPC call */
-/*	error_code = job_step_cancel (  job_step_id_msg->job_id , 
-					job_step_id_msg->job_step_id);
-*/	/* return result */
-	if (error_code)
+	error_code = step_create ( req_step_msg, &step_rec );
+
+	/* return result */
+	if ( step_rec == NULL )
 	{
-		info ("job_step_create error %d  time=%ld", error_code, 
+		info ("job_step_create error %s  time=%ld", slurm_strerror( error_code ), 
 				(long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , error_code );
 	}
 	else
 	{
+		/* FIXME Needs to be fixed to really work with a credential */
 		slurm_job_credential_t cred = { 1,1,"test",start_time,0} ;
 		info ("job_step_create success time=%ld",
 				(long) (clock () - start_time));
-	
-		job_step_resp.job_step_id = 23;
-    	job_step_resp.node_list = cred.node_list;
-    	job_step_resp.credentials = &cred;
+		
+		job_step_resp.job_step_id = step_rec->step_id;
+		bitmap2node_name( step_rec->node_bitmap, &(job_step_resp.node_list) );
+		job_step_resp.credentials = &cred;
+				
 #ifdef HAVE_LIBELAN3
 	/* FIXME */
-    	resp.qsw_job;     /* Elan3 switch context, opaque data structure */
 #endif
 		resp. address = msg -> address ;
 		resp. msg_type = RESPONSE_JOB_STEP_CREATE ;
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index af0248af866..9c1674b3eb4 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -199,15 +199,7 @@ struct 	step_record {
 #endif
 };
 
-struct step_specs {
-	uint32_t job_id;		/* job ID */
-	uint32_t step_id;		/* step number */
-	uint32_t user_id;		/* user the job runs as */
-	uint32_t min_nodes;		/* count of required nodes */
-	uint32_t min_cpus;		/* count of required processors */
-	char *node_list;		/* list of required nodes */
-	char *relative_node_list;	/* relative positions of required nodes */
-};
+typedef struct job_step_create_request_msg step_specs; 
 
 extern List job_list;			/* list of job_record entries */
 
@@ -690,7 +682,7 @@ extern int slurm_parser (char *spec, ...);
  * output: returns 0 on success, EINVAL if specification is invalid
  * NOTE: the calling program must xfree the memory pointed to by new_job_id
  */
-extern int step_create (struct step_specs *step_specs);
+extern int step_create ( step_specs *step_specs, struct step_record** );
 
 /* step_lock - lock the step information 
  * global: step_mutex - semaphore for the step table
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index e005240451e..5e0a147f2d6 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -40,8 +40,7 @@
 
 #define BUF_SIZE 1024
 
-bitstr_t * pick_step_nodes (struct job_record  *job_ptr, int min_nodes, int min_cpus, 
-		 char *node_list, char *relative_node_list);
+bitstr_t * pick_step_nodes (struct job_record  *job_ptr, step_specs *step_spec );
 
 /* 
  * create_step_record - create an empty step_record for the specified job.
@@ -249,12 +248,12 @@ pack_step (struct step_record *dump_step_ptr, void **buf_ptr, int *buf_len)
  * pick_step_nodes - select nodes for a job step that satify its requirements
  *	we satify the super-set of constraints.
  * global: node_record_table_ptr - pointer to global node table
- * NOTE: returns all of a job's nodes if min_nodes == INFINITE
+ * NOTE: returns all of a job's nodes if step_spec->node_count == INFINITE
  * NOTE: returned bitmap must be freed by the caller using bit_free()
  */
 bitstr_t *
-pick_step_nodes (struct job_record  *job_ptr, int min_nodes, int min_cpus, 
-		 char *node_list, char *relative_node_list) {
+pick_step_nodes (struct job_record  *job_ptr, step_specs *step_spec ) {
+
 	bitstr_t *nodes_avail = NULL, *nodes_picked = NULL, *node_tmp = NULL;
 	int error_code, nodes_picked_cnt = 0, cpus_picked_cnt, i;
 
@@ -263,41 +262,43 @@ pick_step_nodes (struct job_record  *job_ptr, int min_nodes, int min_cpus,
 	
 	nodes_avail = bit_copy(job_ptr->node_bitmap);
 
-	if (min_nodes == INFINITE)	/* return all available nodes */
+	if ( step_spec->node_count == INFINITE)	/* return all available nodes */
 		return nodes_avail;
 
-	if (node_list) {
-		error_code = node_name2bitmap (node_list, &nodes_picked);
-		if (error_code) {
-			info ("pick_step_nodes: invalid node list %s", node_list);
-			goto cleanup;
-		}
-		if (bit_super_set (nodes_picked, job_ptr->node_bitmap) == 0) {
-			info ("pick_step_nodes: requested nodes %s not part of job %u",
-				node_list, job_ptr->job_id);
-			goto cleanup;
+	if (step_spec->node_list) {
+		if ( step_spec->relative ) {
+			/* FIXME need to resolve format of relative_node_list */
+			info ("pick_step_nodes: relative_node_list not yet supported");
+
+		} 
+		else {
+			error_code = node_name2bitmap (step_spec->node_list, &nodes_picked);
+			if (error_code) {
+				info ("pick_step_nodes: invalid node list %s", step_spec->node_list);
+				goto cleanup;
+			}
+			if (bit_super_set (nodes_picked, job_ptr->node_bitmap) == 0) {
+				info ("pick_step_nodes: requested nodes %s not part of job %u",
+					step_spec->node_list, job_ptr->job_id);
+				goto cleanup;
+			}
 		}
 	}
 	else
 		nodes_picked = bit_alloc (bit_size (nodes_avail) );
 
-	if (relative_node_list) {
-/* need to resolve format of relative_node_list */
-		info ("pick_step_nodes: relative_node_list not yet supported");
-	}
-
 	/* if user specifies step needs a specific processor count and all nodes */
 	/* have the same processor count, just translate this to a node count */
-	if (min_cpus && (job_ptr->num_cpu_groups == 1)) {
-		i = (min_cpus + (job_ptr->cpus_per_node[0] - 1) ) / job_ptr->cpus_per_node[0];
-		min_nodes = (i > min_nodes) ? i : min_nodes;
-		min_cpus = 0;
+	if (step_spec->cpu_count && (job_ptr->num_cpu_groups == 1)) {
+		i = (step_spec->cpu_count + (job_ptr->cpus_per_node[0] - 1) ) / job_ptr->cpus_per_node[0];
+		step_spec->node_count = (i > step_spec->node_count) ? i : step_spec->node_count ;
+		step_spec->cpu_count = 0;
 	}
 
-	if (min_nodes) {
+	if (step_spec->node_count) {
 		nodes_picked_cnt = bit_set_count(nodes_picked);
-		if (min_nodes > nodes_picked_cnt) {
-			node_tmp = bit_pick_cnt(nodes_avail, (min_nodes - nodes_picked_cnt));
+		if (step_spec->node_count > nodes_picked_cnt) {
+			node_tmp = bit_pick_cnt(nodes_avail, (step_spec->node_count - nodes_picked_cnt));
 			if (node_tmp == NULL)
 				goto cleanup;
 			bit_or  (nodes_picked, node_tmp);
@@ -305,13 +306,13 @@ pick_step_nodes (struct job_record  *job_ptr, int min_nodes, int min_cpus,
 			bit_and (nodes_avail, node_tmp);
 			bit_free (node_tmp);
 			node_tmp = NULL;
-			nodes_picked_cnt = min_nodes;
+			nodes_picked_cnt = step_spec->node_count;
 		}
 	}
 
-	if (min_cpus) {
+	if (step_spec->cpu_count) {
 		cpus_picked_cnt = count_cpus(nodes_picked);
-		if (min_cpus > cpus_picked_cnt) {
+		if (step_spec->cpu_count > cpus_picked_cnt) {
 			int first_bit, last_bit;
 			first_bit = bit_ffs(nodes_avail);
 			last_bit  = bit_fls(nodes_avail);
@@ -320,10 +321,10 @@ pick_step_nodes (struct job_record  *job_ptr, int min_nodes, int min_cpus,
 					continue;
 				bit_set (nodes_picked, i);
 				cpus_picked_cnt += node_record_table_ptr[i].cpus;
-				if (cpus_picked_cnt >= min_cpus)
+				if (cpus_picked_cnt >= step_spec->cpu_count)
 					break;
 			}
-			if (min_cpus > cpus_picked_cnt)
+			if (step_spec->cpu_count > cpus_picked_cnt)
 				goto cleanup;
 		}
 	}
@@ -342,14 +343,16 @@ cleanup:
 
 
 /*
- * step_create - parse the suppied job step specification and create step_records for it
+ * step_create - creates a step_record in step_specs->job_id, sets up the
+ *	accoding to the step_specs.
  * input: step_specs - job step specifications
- * output: returns 0 on success, EINVAL if specification is invalid
- * globals: step_list - pointer to global job step list 
- * NOTE: the calling program must xfree the memory pointed to by new_job_id
+ * output: SUCCESS: returns a pointer to the step_record
+ * 		FAILURE: sets slurm_srrno appropriately and returns
+ * NOTE: don't free the returned step_record because that is managed through
+ * 	the job.
  */
 int
-step_create (struct step_specs *step_specs)
+step_create ( step_specs *step_specs, struct step_record** new_step_record  )
 {
 	struct step_record *step_ptr;
 	struct job_record  *job_ptr;
@@ -360,21 +363,26 @@ step_create (struct step_specs *step_specs)
 #endif
 
 	job_ptr = find_job_record (step_specs->job_id);
-	if (job_ptr == NULL)
-		return ESLURM_INVALID_JOB_ID;
+	if (job_ptr == NULL) 
+		return ESLURM_INVALID_JOB_ID ;
+
 	if (step_specs->user_id != job_ptr->user_id &&
-	    step_specs->user_id != 0)
-		return ESLURM_ACCESS_DENIED;
+	    	step_specs->user_id != 0) 
+		return ESLURM_ACCESS_DENIED ;
+
+	nodeset = pick_step_nodes (job_ptr, step_specs );
 
-	nodeset = pick_step_nodes (job_ptr, step_specs->min_nodes, step_specs->min_cpus, 
-		step_specs->node_list, step_specs->relative_node_list);
 	if (nodeset == NULL)
-		return ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE;
+		return ESLURM_REQUESTED_NODE_CONFIG_UNAVAILABLE ;
 
+	/* FIXME need to set the error codes and define them 
+	 * probably shouldn't exit w/ a fatal... 
+	 */
 	step_ptr = create_step_record (job_ptr);
 	if (step_ptr == NULL)
 		fatal ("create_step_record failed with no memory");
 
+	/* set the step_record values */
 	step_ptr->step_id = (job_ptr->next_step_id)++;
 	step_ptr->node_bitmap = nodeset;
 
@@ -396,5 +404,7 @@ step_create (struct step_specs *step_specs)
 		fatal ("step_create: qsw_setup_jobinfo error");
 	bit_free (nodeset);
 #endif
-	return 0;
+
+	*new_step_record = step_ptr;
+	return SLURM_SUCCESS;
 }
-- 
GitLab