From c9dd5f770d7beb38a21e0fbca90662ac6db32b00 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Fri, 5 Jul 2002 19:05:12 +0000
Subject: [PATCH] Fix bug in unpack array code Add cpucount by node for
 resource allocation

---
 src/slurmctld/controller.c                    | 82 ++++++++++---------
 src/slurmctld/job_mgr.c                       | 56 ++++++++-----
 src/slurmctld/node_mgr.c                      | 51 ------------
 src/slurmctld/node_scheduler.c                | 64 ++++++++++++++-
 src/slurmctld/slurmctld.h                     | 79 ++++++++----------
 .../slurm_unit/api/manual/allocate-tst.c      | 51 ++++++++----
 6 files changed, 205 insertions(+), 178 deletions(-)

diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index 0dba83311c1..08c9456d66b 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -194,7 +194,7 @@ slurmctld_req ( slurm_msg_t * msg )
 			slurm_rpc_update_partition ( msg ) ;
 			break;
 		default:
-			error ("slurmctld_req: invalid request msg type %d\n", msg-> msg_type);
+			error ("invalid request msg type %d\n", msg-> msg_type);
 			slurm_send_rc_msg ( msg , EINVAL );
 			break;
 	}
@@ -216,7 +216,7 @@ slurm_rpc_dump_build ( slurm_msg_t * msg )
 	/* check to see if build_data has changed */	
 	if ( last_time_msg -> last_update >= init_time )
 	{
-		info ("slurmctld_req: dump_build, no change, time=%ld", 
+		info ("dump_build, no change, time=%ld", 
 			(long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , SLURM_NO_CHANGE_IN_DATA );
 	}
@@ -230,7 +230,7 @@ slurm_rpc_dump_build ( slurm_msg_t * msg )
 		response_msg . data = & build_tbl ;
 
 		/* send message */
-		info ("slurmctld_req: dump_build time=%ld", (long) (clock () - start_time));
+		info ("dump_build time=%ld", (long) (clock () - start_time));
 		slurm_send_node_msg( msg -> conn_fd , &response_msg ) ;
 	}
 }
@@ -250,7 +250,7 @@ slurm_rpc_dump_jobs ( slurm_msg_t * msg )
 
 	if ( last_time_msg -> last_update >= last_job_update )
 	{
-		info ("slurmctld_req: dump_job, no change, time=%ld", 
+		info ("dump_job, no change, time=%ld", 
 			(long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , SLURM_NO_CHANGE_IN_DATA );
 	}
@@ -266,7 +266,7 @@ slurm_rpc_dump_jobs ( slurm_msg_t * msg )
 
 		/* send message */
 		slurm_send_node_msg( msg -> conn_fd , &response_msg ) ;
-		info ("slurmctld_req: dump_job, size=%d, time=%ld", 
+		info ("dump_job, size=%d, time=%ld", 
 		      dump_size, (long) (clock () - start_time));
 		if (dump)
 			xfree (dump);
@@ -287,7 +287,7 @@ slurm_rpc_dump_nodes ( slurm_msg_t * msg )
 
 	if ( last_time_msg -> last_update >= last_node_update )
 	{
-		info ("slurmctld_req: dump_node, no change, time=%ld", 
+		info ("dump_node, no change, time=%ld", 
 			(long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , SLURM_NO_CHANGE_IN_DATA );
 	}
@@ -303,7 +303,7 @@ slurm_rpc_dump_nodes ( slurm_msg_t * msg )
 
 		/* send message */
 		slurm_send_node_msg( msg -> conn_fd , &response_msg ) ;
-		info ("slurmctld_req: dump_node, size=%d, time=%ld", 
+		info ("dump_node, size=%d, time=%ld", 
 		      dump_size, (long) (clock () - start_time));
 		if (dump)
 			xfree (dump);
@@ -324,7 +324,7 @@ slurm_rpc_dump_partitions ( slurm_msg_t * msg )
 
 	if ( last_time_msg -> last_update >= last_part_update )
 	{
-		info ("slurmctld_req: dump_part, no change, time=%ld", 
+		info ("dump_part, no change, time=%ld", 
 			(long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , SLURM_NO_CHANGE_IN_DATA );
 	}
@@ -340,7 +340,7 @@ slurm_rpc_dump_partitions ( slurm_msg_t * msg )
 
 		/* send message */
 		slurm_send_node_msg( msg -> conn_fd , &response_msg ) ;
-		info ("slurmctld_req: dump_part, size=%d, time=%ld", 
+		info ("dump_part, size=%d, time=%ld", 
 		      dump_size, (long) (clock () - start_time));
 		if (dump)
 			xfree (dump);
@@ -363,13 +363,13 @@ slurm_rpc_job_cancel ( slurm_msg_t * msg )
 	/* return result */
 	if (error_code)
 	{
-		info ("slurmctld_req: job_cancel error %d for %u, time=%ld",
+		info ("job_cancel error %d for %u, time=%ld",
 			error_code, job_id_msg->job_id, (long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , error_code );
 	}
 	else
 	{
-		info ("slurmctld_req: job_cancel success for %u, time=%ld",
+		info ("job_cancel success for %u, time=%ld",
 			job_id_msg->job_id, (long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , SLURM_SUCCESS );
 	}
@@ -393,14 +393,14 @@ slurm_rpc_update_job ( slurm_msg_t * msg )
 	/* return result */
 	if (error_code)
 	{
-		error ("slurmctld_req: update error %d on job id %u, time=%ld",
+		error ("update error %d on job id %u, time=%ld",
 				error_code, job_desc_msg->job_id, 
 				(long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , error_code );
 	}
 	else
 	{
-		info ("slurmctld_req: updated job id %u, time=%ld",
+		info ("updated job id %u, time=%ld",
 				job_desc_msg->job_id, 
 				(long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , SLURM_SUCCESS );
@@ -426,14 +426,14 @@ slurm_rpc_update_node ( slurm_msg_t * msg )
 	/* return result */
 	if (error_code)
 	{
-		error ("slurmctld_req: update error %d on node %s, time=%ld",
+		error ("update error %d on node %s, time=%ld",
 				error_code, update_node_msg_ptr->node_names, 
 				(long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , error_code );
 	}
 	else
 	{
-		info ("slurmctld_req: updated node %s, time=%ld",
+		info ("updated node %s, time=%ld",
 				update_node_msg_ptr->node_names, 
 				(long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , SLURM_SUCCESS );
@@ -459,13 +459,13 @@ slurm_rpc_update_partition ( slurm_msg_t * msg )
 	/* return result */
 	if (error_code)
 	{
-		error ("slurmctld_req: update error %d on partition %s, time=%ld",
+		error ("update error %d on partition %s, time=%ld",
 				error_code, part_desc_ptr->name, (long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , error_code );
 	}
 	else
 	{
-		info ("slurmctld_req: updated partition %s, time=%ld",
+		info ("updated partition %s, time=%ld",
 				part_desc_ptr->name, (long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , SLURM_SUCCESS );
 	}
@@ -500,13 +500,13 @@ slurm_rpc_submit_batch_job ( slurm_msg_t * msg )
 	/* return result */
 	if (error_code)
 	{
-		info ("slurmctld_req: job_submit error %d, time=%ld",
+		info ("job_submit error %d, time=%ld",
 				error_code, (long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , error_code );
 	}
 	else
 	{
-		info ("slurmctld_req: job_submit success for id=%u, time=%ld",
+		info ("job_submit success for id=%u, time=%ld",
 				job_id, (long) (clock () - start_time));
 		/* send job_ID */
 		submit_msg . job_id = job_id ;
@@ -526,26 +526,29 @@ void slurm_rpc_allocate_resources ( slurm_msg_t * msg , uint8_t immediate )
 	clock_t start_time;
 	job_desc_msg_t * job_desc_msg = ( job_desc_msg_t * ) msg-> data ;
 	char * node_list_ptr = NULL;
-	int job_id ;
+	uint16_t num_cpu_groups = 0;
+	uint32_t * cpus_per_node = NULL, * cpu_count_reps = NULL;
+	uint32_t job_id ;
 	resource_allocation_response_msg_t alloc_msg ;
 
 	start_time = clock ();
 
 	/* do RPC call */
 	dump_job_desc(job_desc_msg);
-	error_code = job_allocate(job_desc_msg, 
-			&job_id, &node_list_ptr, immediate , false );
+	error_code = job_allocate(job_desc_msg, &job_id, 
+			&node_list_ptr, &num_cpu_groups, &cpus_per_node, &cpu_count_reps, 
+			immediate , false );
 
 	/* return result */
 	if (error_code)
 	{
-		info ("slurmctld_req: error %d allocating resources, time=%ld",
+		info ("error %d allocating resources, time=%ld",
 				error_code,  (long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , error_code );
 	}
 	else
 	{
-		info ("slurmctld_req: allocated nodes %s, JobId=%u, time=%ld",
+		info ("allocated nodes %s, JobId=%u, time=%ld",
 				node_list_ptr , job_id , 	
 				(long) (clock () - start_time));
 		
@@ -553,13 +556,15 @@ void slurm_rpc_allocate_resources ( slurm_msg_t * msg , uint8_t immediate )
 
 		alloc_msg . job_id = job_id ;
 		alloc_msg . node_list = node_list_ptr ;
-		alloc_msg.num_cpu_groups = 0;
-		response_msg . msg_type = ( immediate ) ? RESPONSE_IMMEDIATE_RESOURCE_ALLOCATION : RESPONSE_RESOURCE_ALLOCATION ;
+		alloc_msg . num_cpu_groups = num_cpu_groups;
+		alloc_msg . cpus_per_node  = cpus_per_node;
+		alloc_msg . cpu_count_reps = cpu_count_reps;
+		response_msg . msg_type = ( immediate ) ? 
+				RESPONSE_IMMEDIATE_RESOURCE_ALLOCATION : RESPONSE_RESOURCE_ALLOCATION ;
 		response_msg . data =  & alloc_msg ;
 
 		slurm_send_node_msg ( msg->conn_fd , & response_msg ) ;
 	}
-	xfree ( node_list_ptr );
 }
 
 /* slurm_rpc_job_will_run - determine if job with given configuration can be initiated now */
@@ -568,32 +573,33 @@ void slurm_rpc_job_will_run ( slurm_msg_t * msg )
 	/* init */
 	int error_code;
 	clock_t start_time;
+	uint16_t num_cpu_groups = 0;
+	uint32_t * cpus_per_node = NULL, * cpu_count_reps = NULL;
 	uint32_t job_id ;
 	job_desc_msg_t * job_desc_msg = ( job_desc_msg_t * ) msg-> data ;
-	char * node_name_ptr = NULL;
+	char * node_list_ptr = NULL;
 
 	start_time = clock ();
 
 	/* do RPC call */
 	dump_job_desc(job_desc_msg);
-	error_code = job_allocate(job_desc_msg,
-			&job_id, &node_name_ptr, false , true );
+	error_code = job_allocate(job_desc_msg, &job_id, 
+			&node_list_ptr, &num_cpu_groups, &cpus_per_node, &cpu_count_reps, 
+			false , true );
 	
 	/* return result */
 	if (error_code)
 	{
-		info ("slurmctld_req: job_will_run error %d, time=%ld",
+		info ("job_will_run error %d, time=%ld",
 				error_code, (long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , error_code );
 	}
 	else
 	{
-		info ("slurmctld_req: job_will_run success for , time=%ld",
+		info ("job_will_run success for , time=%ld",
 				(long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , SLURM_SUCCESS );
 	}
-	xfree ( node_name_ptr ) ;
-
 }
 
 /* slurm_rpc_reconfigure_controller - re-initialize controller from configuration files */
@@ -615,13 +621,13 @@ slurm_rpc_reconfigure_controller ( slurm_msg_t * msg )
 	/* return result */
 	if (error_code)
 	{
-		error ("slurmctld_req: reconfigure error %d, time=%ld",
+		error ("reconfigure error %d, time=%ld",
 				error_code, (long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , error_code );
 	}
 	else
 	{
-		info ("slurmctld_req: reconfigure completed successfully, time=%ld", 
+		info ("reconfigure completed successfully, time=%ld", 
 				(long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , SLURM_SUCCESS );
 	}
@@ -654,13 +660,13 @@ slurm_rpc_node_registration ( slurm_msg_t * msg )
 	/* return result */
 	if (error_code)
 	{
-		error ("slurmctld_req: validate_node_specs error %d for %s, time=%ld",
+		error ("validate_node_specs error %d for %s, time=%ld",
 			error_code, node_reg_stat_msg -> node_name, (long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , error_code );
 	}
 	else
 	{
-		info ("slurmctld_req: validate_node_specs for %s, time=%ld",
+		info ("validate_node_specs for %s, time=%ld",
 			node_reg_stat_msg -> node_name, (long) (clock () - start_time));
 		slurm_send_rc_msg ( msg , SLURM_SUCCESS );
 	}
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index 2b5a48f75bb..200c3db6429 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -231,8 +231,6 @@ delete_job_details (struct job_record *job_entry)
 		xfree(job_entry->details->req_nodes);
 	if (job_entry->details->req_node_bitmap)
 		bit_free(job_entry->details->req_node_bitmap);
-	if (job_entry->details->node_list)
-		xfree(job_entry->details->node_list);
 	if (job_entry->details->features)
 		xfree(job_entry->details->features);
 	xfree(job_entry->details);
@@ -263,8 +261,7 @@ delete_job_record (uint32_t job_id)
 		if (job_record_point->job_id != job_id)
 			continue;
 
-		if (job_record_point->details) 
-			xfree (job_record_point->details);
+		delete_job_details (job_record_point);
 		xfree (job_record_point);
 		list_remove (job_record_iterator);
 		break;
@@ -358,45 +355,62 @@ init_job_conf ()
 
 
 /*
- * job_allocate - create job_records for job with supplied specification and 
- *	allocate nodes for it. if the job can not be immediately allocated nodes
+ * job_allocate - parse the suppied job specification, create job_records for it, 
+ *	and allocate nodes for it. if the job can not be immediately allocated 
+ *	nodes, EAGAIN will be returned
  * input: job_specs - job specifications
  *	new_job_id - location for storing new job's id
  *	node_list - location for storing new job's allocated nodes
- *	immediate - either allocate nodes immediately or return failure
- *	will_run - test if job allocation would succeed, don't actually allocate nodes
+ *	num_cpu_groups - location to store number of cpu groups
+ *	cpus_per_node - location to store pointer to array of numbers of cpus on each node allocated
+ *	cpu_count_reps - location to store pointer to array of numbers of consecutive nodes having
+ *				 same cpu count
  * output: new_job_id - the job's ID
+ *	num_cpu_groups - number of cpu groups (elements in cpus_per_node and cpu_count_reps)
+ *	cpus_per_node - pointer to array of numbers of cpus on each node allocate
+ *	cpu_count_reps - pointer to array of numbers of consecutive nodes having same cpu count
  *	node_list - list of nodes allocated to the job
- *	returns 0 on success, otherwise an error code from common/slurm_protocol_errno.h
+ *	returns 0 on success, EINVAL if specification is invalid, 
+ *		EAGAIN if higher priority jobs exist
  * globals: job_list - pointer to global job list 
  *	list_part - global list of partition info
  *	default_part_loc - pointer to default partition 
- *	last_job_update - time of last job table update
- * NOTE: the calling program must xfree the memory pointed to by node_list
+ * NOTE: If allocating nodes lx[0-7] to a job and those nodes have cpu counts of 
+ *	 4, 4, 4, 4, 8, 8, 4, 4 then num_cpu_groups=3, cpus_per_node={4,8,4} and
+ *	cpu_count_reps={4,2,2}
  */
 
 int
 immediate_job_launch (job_desc_msg_t * job_specs, uint32_t *new_job_id, char **node_list, 
-                         int immediate , int will_run )
+		uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, 
+		int immediate , int will_run )
 {
-	return job_allocate (job_specs, new_job_id, node_list, true , false );
+	return job_allocate (job_specs, new_job_id, node_list, 
+				num_cpu_groups, cpus_per_node, cpu_count_reps, 
+				true , false );
 }
 
 int 
 will_job_run (job_desc_msg_t * job_specs, uint32_t *new_job_id, char **node_list, 
-                  int immediate , int will_run )
+		uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, 
+		int immediate , int will_run )
 {
-	return job_allocate (job_specs, new_job_id, node_list, false , true );
+	return job_allocate (job_specs, new_job_id, node_list, 
+				num_cpu_groups, cpus_per_node, cpu_count_reps, 
+				false , true );
 }
 
-int
-job_allocate (job_desc_msg_t * job_specs, uint32_t *new_job_id, char **node_list, 
-	      int immediate , int will_run )
+int 
+job_allocate (job_desc_msg_t  *job_specs, uint32_t *new_job_id, char **node_list, 
+	uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, 
+	int immediate, int will_run)
 {
 	int error_code;
 	struct job_record *job_ptr;
 
+	*num_cpu_groups = 0;
 	node_list[0] = NULL;
+	cpus_per_node[0] = cpu_count_reps[0] = NULL;
 
 	error_code = job_create (job_specs, new_job_id, 1, will_run, &job_ptr);
 	if (error_code || will_run)
@@ -434,8 +448,10 @@ job_allocate (job_desc_msg_t * job_specs, uint32_t *new_job_id, char **node_list
 		job_ptr->end_time  = 0;
 	}
 
-	node_list[0] = xmalloc (strlen(job_ptr->nodes) + 1);
-	strcpy(node_list[0], job_ptr->nodes);
+	node_list[0]      = job_ptr->nodes;
+	*num_cpu_groups   = job_ptr->num_cpu_groups;
+	cpus_per_node[0]  = job_ptr->cpus_per_node;
+	cpu_count_reps[0] = job_ptr->cpu_count_reps;
 	return 0;
 }
 
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index c3fd4acb93b..7a686d3e09c 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -346,57 +346,6 @@ bitmap2node_name (bitstr_t *bitmap, char **node_list)
 }
 
 
-/*
- * build_node_list - build a node_list for a job including processor 
-  *	count on the node (e.g. "lx01[4],lx02[4],...")
- *	task distributions on the nodes
- * input: bitmap - bitmap of nodes to use
- *	node_list - place to store node list
- *	total_procs - place to store count of total processors allocated
- * output: node_list - comma separated list of nodes on which the tasks 
- *		are to be initiated
- *	total_procs - count of total processors allocated
- * global: node_record_table_ptr - pointer to global node table
- * NOTE: the storage at node_list must be xfreed by the caller
- */
-void 
-build_node_list (bitstr_t *bitmap, char **node_list, uint32_t *total_procs)
-{
-	int i, node_list_size;
-	int sum_procs;
-	char tmp_str[MAX_NAME_LEN+10];
-
-	*total_procs = 0;
-	node_list[0] = NULL;
-	node_list_size = 0;
-	if (bitmap == NULL)
-		fatal ("build_node_list: bitmap is NULL");
-
-	node_list[0] = xmalloc (BUF_SIZE);
-	strcpy (node_list[0], "");
-
-	sum_procs = 0;
- 	for (i = 0; i < node_record_count; i++) {
-		if (bit_test (bitmap, i) != 1)
-			continue;
-		sprintf (tmp_str, "%s[%d]", 
-			node_record_table_ptr[i].name,
-			node_record_table_ptr[i].cpus);
-		if (node_list_size <
-		    (strlen (node_list[0]) + (MAX_NAME_LEN+10))) {
-			node_list_size += BUF_SIZE;
-			xrealloc (node_list[0], node_list_size);
-		}
-		if (sum_procs > 0)
-			strcat (node_list[0], ",");
-		strcat (node_list[0], node_record_table_ptr[i].name);
-		sum_procs += node_record_table_ptr[i].cpus;
-	}
-	*total_procs = sum_procs;
-	xrealloc (node_list[0], strlen (node_list[0]) + 1);
-}
-
-
 /*
  * create_config_record - create a config_record entry and set is values to the defaults.
  * output: returns pointer to the config_record
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index 702e44e55cf..e821c2dc33a 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -31,6 +31,8 @@ struct node_set {		/* set of nodes with same configuration */
 	bitstr_t *my_bitmap;
 };
 
+void build_node_details (bitstr_t *node_bitmap, 
+		uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t **cpu_count_reps);
 int pick_best_quadrics (bitstr_t *bitmap, bitstr_t *req_bitmap, int req_nodes,
 		    int req_cpus, int consecutive);
 int pick_best_nodes (struct node_set *node_set_ptr, int node_set_size,
@@ -625,7 +627,7 @@ pick_best_nodes (struct node_set *node_set_ptr, int node_set_size,
 						contiguous);
 			if ((pick_code == 0) && (max_nodes != INFINITE)
 			    && (bit_set_count (avail_bitmap) > max_nodes)) {
-				info ("pick_best_nodes: too many nodes selected %u of %u",
+				info ("pick_best_nodes: too many nodes selected %u partition maximum is %u",
 					bit_set_count (avail_bitmap), max_nodes);
 				error_code = EINVAL;
 				break;
@@ -851,9 +853,10 @@ select_nodes (struct job_record *job_ptr, int test_only)
 
 	/* assign the nodes and stage_in the job */
 	bitmap2node_name (req_bitmap, &(job_ptr->nodes));
-	build_node_list (req_bitmap, 
-		&job_ptr->details->node_list, 
-		&job_ptr->details->total_procs);
+	build_node_details (req_bitmap, 
+		&(job_ptr->num_cpu_groups),
+		&(job_ptr->cpus_per_node),
+		&(job_ptr->cpu_count_reps));
 	allocate_nodes (req_bitmap);
 	job_ptr->node_bitmap = req_bitmap;
 	req_bitmap = NULL;
@@ -883,6 +886,59 @@ select_nodes (struct job_record *job_ptr, int test_only)
 }
 
 
+/*
+ * build_node_details - given a bitmap, report the number of cpus per node and their distribution
+ * input: bitstr_t *node_bitmap - the map of nodes
+ * output: num_cpu_groups - element count in arrays cpus_per_node and cpu_count_reps
+ *	cpus_per_node - array of cpus per node allocated
+ *	cpu_count_reps - array of consecutive nodes with same cpu count
+ * NOTE: the arrays cpus_per_node and cpu_count_reps must be xfreed by the caller
+ */
+void 
+build_node_details (bitstr_t *node_bitmap, 
+		uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t **cpu_count_reps)
+{
+	int array_size, array_pos, i;
+	int first_bit, last_bit;
+
+	*num_cpu_groups = 0;
+	if (node_bitmap == NULL) 
+		return;
+
+	first_bit = bit_ffs(node_bitmap);
+	last_bit  = bit_fls(node_bitmap);
+	array_pos = -1;
+
+	/* assume relatively homogeneous array for array allocations */
+	/* we can grow or shrink the arrays as needed */
+	array_size = (last_bit - first_bit) / 100 + 2;
+	cpus_per_node[0]  = xmalloc (sizeof(uint32_t *) * array_size);
+	cpu_count_reps[0] = xmalloc (sizeof(uint32_t *) * array_size);
+
+	for (i = first_bit; i <= last_bit; i++) {
+		if (bit_test (node_bitmap, i) != 1)
+			continue;
+		if ((array_pos == -1) ||
+		    (cpus_per_node[0][array_pos] != node_record_table_ptr[i].cpus)) {
+			array_pos++;
+			if (array_pos >= array_size) { /* grow arrays */
+				array_size *= 2;
+				xrealloc (cpus_per_node[0],  (sizeof(uint32_t *) * array_size));
+				xrealloc (cpu_count_reps[0], (sizeof(uint32_t *) * array_size));
+			}
+			cpus_per_node [0][array_pos] = node_record_table_ptr[i].cpus;
+			cpu_count_reps[0][array_pos] = 1;
+		}
+		else {
+			cpu_count_reps[0][array_pos]++;
+		}
+	}
+	array_size = array_pos + 1;
+	*num_cpu_groups = array_size;
+	xrealloc (cpus_per_node[0],  (sizeof(uint32_t *) * array_size));
+	xrealloc (cpu_count_reps[0], (sizeof(uint32_t *) * array_size));
+}
+
 /*
  * valid_features - determine if the requested features are satisfied by those available
  * input: requested - requested features (by a job)
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index 1b4aefecdf1..7f144afccb7 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -1,17 +1,28 @@
-/* 
- * slurmctld.h - definitions for slurmcltd use
- *
- * NOTE: the job, node, and partition specifications are all of the 
- * same basic format:
- * if the first character of a line is "#" then it is a comment.
- * place all information for a single node, partition, or job on a 
- *    single line. 
- * space delimit collection of keywords and values and separate
- *    the keyword from value with an equal sign (e.g. "cpus=3"). 
- * list entries should be comma separated (e.g. "nodes=lx01,lx02").
- * 
- * see the slurm administrator guide for more details.
- */
+/*****************************************************************************\
+ * slurmctld.h - definitions of functions and structures for slurmcltd use
+ *****************************************************************************
+ *  Copyright (C) 2002 The Regents of the University of California.
+ *  Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
+ *  Written by Moe Jette <jette@llnl.gov> et. al.
+ *  UCRL-CODE-2002-040.
+ *  
+ *  This file is part of SLURM, a resource management program.
+ *  For details, see <http://www.llnl.gov/linux/slurm/>.
+ *  
+ *  SLURM is free software; you can redistribute it and/or modify it under
+ *  the terms of the GNU General Public License as published by the Free
+ *  Software Foundation; either version 2 of the License, or (at your option)
+ *  any later version.
+ *  
+ *  SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
+ *  WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ *  FOR A PARTICULAR PURPOSE.  See the GNU General Public License for more
+ *  details.
+ *  
+ *  You should have received a copy of the GNU General Public License along
+ *  with SLURM; if not, write to the Free Software Foundation, Inc.,
+ *  59 Temple Place, Suite 330, Boston, MA  02111-1307  USA.
+\*****************************************************************************/
 
 #ifndef _HAVE_SLURM_H
 #define _HAVE_SLURM_H
@@ -129,6 +140,7 @@ extern time_t last_step_update;	/* time of last update to job steps */
 
 extern int job_count;			/* number of jobs in the system */
 
+/* job_details - specification of a job's constraints, not required after initiation */
 struct job_details {
 	uint32_t magic;			/* magic cookie to test data integrity */
 	uint32_t num_procs;		/* minimum number of processors */
@@ -145,7 +157,6 @@ struct job_details {
 	char *job_script;		/* name of job script to execute */
 	uint16_t procs_per_task;	/* processors required per task */
 	uint32_t total_procs;		/* total number of allocated processors, for accounting */
-	char *node_list;		/* comma separated assigned node list (by task) */
 	time_t submit_time;		/* time of submission */
 };
 
@@ -165,6 +176,9 @@ struct job_record {
 	uint32_t priority;		/* relative priority of the job */
 	struct job_details *details;	/* job details (set until job terminates) */
 	uint16_t next_step_id;		/* next step id to be used */
+	uint16_t num_cpu_groups;	/* element count in arrays cpus_per_node and cpu_count_reps */
+	uint32_t *cpus_per_node;	/* array of cpus per node allocated */
+	uint32_t *cpu_count_reps;	/* array of consecutive nodes with same cpu count */
 };
 
 struct 	step_record {
@@ -216,21 +230,6 @@ extern void bitmap2node_name (bitstr_t *bitmap, char **node_list);
  */
 extern enum task_dist block_or_cycle (char *in_string);
 
-/*
- * build_node_list - build a node_list for a job including processor 
- *	count on the node (e.g. "lx01[4],lx02[4],...")
- * input: bitmap - bitmap of nodes to use
- *	node_list - place to store node list
- *	total_procs - place to store count of total processors allocated
- * output: node_list - comma separated list of nodes on which the tasks 
- *		are to be initiated
- *	total_procs - count of total processors allocated
- * global: node_record_table_ptr - pointer to global node table
- * NOTE: the storage at node_list must be xfreed by the caller
- */
-extern void  build_node_list (bitstr_t *bitmap, char **node_list, 
-	uint32_t *total_procs);
-
 /*
  * count_cpus - report how many cpus are associated with the identified nodes 
  * input: bitmap - a node bitmap
@@ -421,23 +420,9 @@ extern int init_slurm_conf ();
 
 extern int  is_key_valid (void * key);
 
-/*
- * job_allocate - parse the suppied job specification, create job_records for it, 
- *	and allocate nodes for it. if the job can not be immediately allocated 
- *	nodes, EAGAIN will be returned
- * input: job_specs - job specifications
- *	new_job_id - location for storing new job's id
- *	node_list - location for storing new job's allocated nodes
- * output: new_job_id - the job's ID
- *	node_list - list of nodes allocated to the job
- *	returns 0 on success, EINVAL if specification is invalid, 
- *		EAGAIN if higher priority jobs exist
- * globals: job_list - pointer to global job list 
- *	list_part - global list of partition info
- *	default_part_loc - pointer to default partition 
- * NOTE: the calling program must xfree the memory pointed to by node_list
- */
-extern int job_allocate (job_desc_msg_t  *job_specs, uint32_t *new_job_id, char **node_list, int immediate, int will_run);
+extern int job_allocate (job_desc_msg_t  *job_specs, uint32_t *new_job_id, char **node_list, 
+	uint16_t * num_cpu_groups, uint32_t ** cpus_per_node, uint32_t ** cpu_count_reps, 
+	int immediate, int will_run);
 
 /* 
  * job_cancel - cancel the specified job
diff --git a/testsuite/slurm_unit/api/manual/allocate-tst.c b/testsuite/slurm_unit/api/manual/allocate-tst.c
index 196ca989770..183957567c3 100644
--- a/testsuite/slurm_unit/api/manual/allocate-tst.c
+++ b/testsuite/slurm_unit/api/manual/allocate-tst.c
@@ -31,8 +31,10 @@
 #include <src/api/slurm.h>
 #include <testsuite/dejagnu.h>
 
+void report_results(resource_allocation_response_msg_t* resp_msg);
+
 /* main is used here for testing purposes only */
-	int 
+int 
 main (int argc, char *argv[])
 {
 	int error_code, job_count, max_jobs;
@@ -60,18 +62,17 @@ main (int argc, char *argv[])
 	job_mesg. time_limit = 200;
 	job_mesg. num_procs = 1000;
 	job_mesg. num_nodes = 400;
+job_mesg. num_nodes = 4096;
 	job_mesg. user_id = 1500;
 
 
 	error_code = slurm_allocate_resources ( &job_mesg , &resp_msg , false ); 
 	if (error_code)
 		printf ("allocate error %d\n", error_code);
-	else {
-		printf ("allocate nodes %s to job %u\n", resp_msg->node_list, resp_msg->job_id);
-	}
-	job_count = 1;
+	else
+		report_results(resp_msg);
 
-	for ( ; job_count <max_jobs;  job_count++) {
+	for (job_count = 1 ; job_count <max_jobs;  job_count++) {
 		slurm_init_job_desc_msg( &job_mesg );
 		job_mesg. contiguous = 1;
 		job_mesg. groups = ("students,employee\0");
@@ -87,6 +88,8 @@ main (int argc, char *argv[])
 		job_mesg. time_limit = 200;
 		job_mesg. num_procs = 4000;
 		job_mesg. user_id = 1500;
+/* job_mesg. num_nodes = 4096; */
+job_mesg. contiguous = 0;
 
 		/* the string also had Immediate */
 		error_code = slurm_allocate_resources ( &job_mesg , &resp_msg , true ); 
@@ -94,10 +97,8 @@ main (int argc, char *argv[])
 			printf ("allocate error %d\n", error_code);
 			break;
 		}
-		else {
-			printf ("allocate nodes %s to job %u\n", 
-				resp_msg->node_list, resp_msg->job_id);
-		}
+		else 
+			report_results(resp_msg);
 	}
 
 	for ( ; job_count <max_jobs;  job_count++) {
@@ -111,10 +112,8 @@ main (int argc, char *argv[])
 			printf ("allocate error %d\n", error_code);
 			break;
 		}
-		else {
-			printf ("allocate nodes %s to job %u\n", 
-				resp_msg->node_list, resp_msg->job_id);
-		}
+		else 
+			report_results(resp_msg);
 	}
 
 	for ( ; job_count <max_jobs;  job_count++) {
@@ -128,11 +127,27 @@ main (int argc, char *argv[])
 			printf ("allocate error %d\n", error_code);
 			break;
 		}
-		else {
-			printf ("allocate nodes %s to job %u\n", 
-				resp_msg->node_list, resp_msg->job_id);
-		}
+		else 
+			report_results(resp_msg);
 	}
 
 	return (0);
 }
+
+/* report results of successful job allocation */
+void
+report_results(resource_allocation_response_msg_t* resp_msg)
+{
+	int i;
+
+	printf ("allocate nodes %s to job %u\n", resp_msg->node_list, resp_msg->job_id);
+	if (resp_msg->num_cpu_groups > 0) {
+		printf ("processor counts: ");
+		for (i=0; i<resp_msg->num_cpu_groups; i++) {
+			if (i > 0)
+				printf(", ");
+			printf ("%u(x%u)", resp_msg->cpus_per_node[i], resp_msg->cpu_count_reps[i]);
+		}
+		printf ("\n");
+	}
+}
-- 
GitLab