From c488c547acb2745e243565b949ee30abfa464f3e Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Tue, 28 May 2002 23:57:33 +0000
Subject: [PATCH] Changed job_id to 32 bits per DPCS requirement. Jobs without
 job_id specified at submit time start with a value of 1<<16 and DPCS uses
 values 1 to 0xffffffff. This preclude effective use of a max job_id in search
 function, which was able to quickly indicate that a job_id requested by DPCS
 was not already in use. I added a job_id hash table to restore quick job find
 operations. Added check of packstr function to insure string would not go
 over 64k bytes, which is the limit for the protocol used.

---
 src/common/pack.h              |   5 +-
 src/scontrol/scontrol.c        |   4 +-
 src/slurmctld/controller.c     |   8 +-
 src/slurmctld/job_mgr.c        | 131 +++++++++++++++++++++++----------
 src/slurmctld/node_scheduler.c |   2 +-
 src/slurmctld/parse_spec.c     |  57 ++++++++++++++
 src/slurmctld/slurmctld.h      |  29 ++++----
 src/slurmctld/step_mgr.c       |  10 +--
 8 files changed, 181 insertions(+), 65 deletions(-)

diff --git a/src/common/pack.h b/src/common/pack.h
index 24d86d1b3f2..88efb6f4588 100644
--- a/src/common/pack.h
+++ b/src/common/pack.h
@@ -72,9 +72,10 @@ void	_unpackmem_xmalloc(char **valp, uint16_t *size_valp, void **bufp, int *lenp
 } while (0)
 
 #define packstr(str,bufp,lenp) do {			\
-	uint16_t _size;					\
-	_size = (uint16_t)(str ? strlen(str)+1 : 0);	\
+	uint32_t _size;					\
+	_size = (uint32_t)(str ? strlen(str)+1 : 0);	\
         assert(_size == 0 || str != NULL);		\
+	assert(_size <= 0xffffffff);			\
 	assert((bufp) != NULL && *(bufp) != NULL);	\
         assert((lenp) != NULL);				\
         assert(*(lenp) >= (sizeof(_size)+_size));	\
diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c
index f623ccc5c2c..2c455eada78 100644
--- a/src/scontrol/scontrol.c
+++ b/src/scontrol/scontrol.c
@@ -278,7 +278,7 @@ void
 print_job (char * job_id_str) 
 {
 	int error_code, i;
-	uint16_t job_id = 0;
+	uint32_t job_id = 0;
 	static struct job_buffer *old_job_buffer_ptr = NULL;
 	struct job_buffer *job_buffer_ptr = NULL;
 	struct job_table *job_ptr = NULL;
@@ -306,7 +306,7 @@ printf("time=%lu\n",(long)old_job_buffer_ptr->last_update);
 		printf ("last_update_time=%ld\n", (long) job_buffer_ptr->last_update);
 
 	if (job_id_str)
-		job_id = (uint16_t) atoi (job_id_str);
+		job_id = (uint32_t) strtol (job_id_str, (char **)NULL, 10);
 
 	job_ptr = job_buffer_ptr->job_table_ptr;
 	for (i = 0; i < job_buffer_ptr->job_count; i++) {
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index f5f1871c069..3a111801d31 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -167,7 +167,7 @@ slurmctld_req (int sockfd) {
 	char in_line[BUF_SIZE], node_name[MAX_NAME_LEN];
 	int cpus, real_memory, tmp_disk;
 	char *node_name_ptr, *part_name, *time_stamp;
-	uint16_t job_id;
+	uint32_t job_id;
 	time_t last_update;
 	clock_t start_time;
 	char *dump;
@@ -348,7 +348,7 @@ slurmctld_req (int sockfd) {
 
 	/* JobCancel - cancel a slurm job or reservation */
 	else if (strncmp ("JobCancel", in_line, 9) == 0) {
-		job_id = (uint16_t) atoi (&in_line[10]);
+		job_id = (uint32_t) strtol (&in_line[10], (char **)NULL, 10);
 		error_code = job_cancel (job_id);
 		if (error_code)
 			info ("slurmctld_req: job_cancel error %d, time=%ld",
@@ -364,7 +364,9 @@ slurmctld_req (int sockfd) {
 
 	/* JobSubmit - submit a job to the slurm queue */
 	else if (strncmp ("JobSubmit", in_line, 9) == 0) {
-		error_code = job_create(&in_line[9], &job_id, 0);	/* skip "JobSubmit" */
+		struct job_record *job_rec_ptr;
+		error_code = job_create(&in_line[9], &job_id, 0, 
+				&job_rec_ptr);	/* skip "JobSubmit" */
 		if (error_code)
 			info ("slurmctld_req: job_submit error %d, time=%ld",
 				 error_code, (long) (clock () - start_time));
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index c14e3189573..9e1a4b2b4c9 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -22,13 +22,15 @@
 #define BUF_SIZE 1024
 #define MAX_STR_PACK 128
 
-int max_job_id = 0;			/* highest job id, for quicker searches */
 int job_count;				/* job's in the system */
 List job_list = NULL;			/* job_record list */
 time_t last_job_update;			/* time of last update to job records */
 static pthread_mutex_t job_mutex = PTHREAD_MUTEX_INITIALIZER;	/* lock for job info */
 char *job_state_string[] =
 	{ "PENDING", "STAGE_IN", "RUNNING", "STAGE_OUT", "COMPLETED", "FAILED", "TIME_OUT", "END" };
+static struct job_record *job_hash[MAX_JOB_COUNT];
+static struct job_record *job_hash_over[MAX_JOB_COUNT];
+static int max_hash_over = 0;
 
 void list_delete_job (void *job_entry);
 int list_find_job_id (void *job_entry, void *key);
@@ -208,21 +210,37 @@ delete_job_details (struct job_record *job_entry)
  * output: return 0 on success, errno otherwise
  * global: job_list - pointer to global job list
  *	last_job_update - time of last job table update
+ * NOTE: Slow as currently constructed due to singly linked list and linear search.
+ *	This would be faster with hash table and doubly linked list. We intend to 
+ *	purge entries through purge_old_job() anyway.
  */
 int 
-delete_job_record (uint16_t job_id) 
+delete_job_record (uint32_t job_id) 
 {
-	int i;
+	ListIterator job_record_iterator;
+	struct job_record *job_record_point;
 
 	last_job_update = time (NULL);
+	job_record_iterator = list_iterator_create (job_list);		
 
-	i = list_delete_all (job_list, &list_find_job_id, &job_id);
-	if (i == 0) {
+	while ((job_record_point = 
+		    (struct job_record *) list_next (job_record_iterator))) {
+		if (job_record_point->job_id != job_id)
+			continue;
+
+		if (job_record_point->details) 
+			xfree (job_record_point->details);
+		xfree (job_record_point);
+		list_remove (job_record_iterator);
+		break;
+	}
+	list_iterator_destroy (job_record_iterator);
+
+	if (job_record_point == NULL) {
 		error ("delete_job_record: attempt to delete non-existent job %u", 
 			job_id);
 		return ENOENT;
-	}  
-
+	} 
 	return 0;
 }
 
@@ -232,19 +250,26 @@ delete_job_record (uint16_t job_id)
  * input: job_id - requested job's id
  * output: pointer to the job's record, NULL on error
  * global: job_list - global job list pointer
+ *	job_hash, job_hash_over, max_hash_over - hash table into job records
  */
 struct job_record *
-find_job_record(uint16_t job_id) 
+find_job_record(uint32_t job_id) 
 {
-	struct job_record *job_ptr;
+	int i;
 
-	if (job_id > max_job_id)
-		return NULL;
+	/* First try to find via hash table */
+	if (job_hash[job_id % MAX_JOB_COUNT] &&
+	    job_hash[job_id % MAX_JOB_COUNT]->job_id == job_id)
+		return job_hash[job_id % MAX_JOB_COUNT];
+
+	/* linear search of overflow hash table overflow */
+	for (i=0; i<max_hash_over; i++) {
+		if (job_hash_over[i] != NULL &&
+		    job_hash_over[i]->job_id == job_id)
+			return job_hash_over[i];
+	}
 
-	job_ptr = list_find_first (job_list, &list_find_job_id, &job_id);
-	if ((job_ptr != NULL) && (job_ptr->magic != JOB_MAGIC))
-		fatal ("job_list invalid");
-	return job_ptr;
+	return NULL;
 }
 
 
@@ -288,19 +313,18 @@ init_job_conf ()
  * NOTE: the calling program must xfree the memory pointed to by node_list
  */
 int
-job_allocate (char *job_specs, uint16_t *new_job_id, char **node_list)
+job_allocate (char *job_specs, uint32_t *new_job_id, char **node_list)
 {
 	int error_code, i;
 	struct job_record *job_ptr;
 
 	node_list[0] = NULL;
 
-	error_code = job_create (job_specs, new_job_id, 1);
+	error_code = job_create (job_specs, new_job_id, 1, &job_ptr);
 	if (error_code)
 		return error_code;
-	job_ptr = find_job_record (*new_job_id);
 	if (job_ptr == NULL)
-		fatal ("job_allocate allocated job %u lacks record", 
+		fatal ("job_allocate: allocated job %u lacks record", 
 			new_job_id);
 
 /*	if (top_priority(new_job_id) != 0)
@@ -325,7 +349,7 @@ job_allocate (char *job_specs, uint16_t *new_job_id, char **node_list)
  *	last_job_update - time of last job table update
  */
 int
-job_cancel (uint16_t job_id) 
+job_cancel (uint32_t job_id) 
 {
 	struct job_record *job_ptr;
 
@@ -363,21 +387,26 @@ job_cancel (uint16_t job_id)
  * job_create - parse the suppied job specification and create job_records for it
  * input: job_specs - job specifications
  *	new_job_id - location for storing new job's id
+ *	job_rec_ptr - place to park pointer to the job (or NULL)
  * output: new_job_id - the job's ID
  *	returns 0 on success, EINVAL if specification is invalid
  *	allocate - if set, job allocation only (no script required)
+ *	job_rec_ptr - pointer to the job (if not passed a NULL)
  * globals: job_list - pointer to global job list 
  *	list_part - global list of partition info
  *	default_part_loc - pointer to default partition 
+ *	job_hash, job_hash_over, max_hash_over - hash table into job records
  */
 int
-job_create (char *job_specs, uint16_t *new_job_id, int allocate)
+job_create (char *job_specs, uint32_t *new_job_id, int allocate, 
+	    struct job_record **job_rec_ptr)
 {
 	char *req_features, *req_node_list, *job_name, *req_group;
 	char *req_partition, *script;
 	int contiguous, req_cpus, req_nodes, min_cpus, min_memory;
 	int i, min_tmp_disk, time_limit, procs_per_task, user_id;
-	int error_code, dist, job_id, key, shared;
+	int error_code, dist, key, shared;
+	long job_id;
 	struct part_record *part_ptr;
 	struct job_record *job_ptr;
 	struct job_details *detail_ptr;
@@ -387,9 +416,10 @@ job_create (char *job_specs, uint16_t *new_job_id, int allocate)
 	req_features = req_node_list = job_name = req_group = NULL;
 	req_partition = script = NULL;
 	req_bitmap = NULL;
-	contiguous = dist = job_id = req_cpus = req_nodes = min_cpus = NO_VAL;
+	contiguous = dist = req_cpus = req_nodes = min_cpus = NO_VAL;
 	min_memory = min_tmp_disk = time_limit = procs_per_task = NO_VAL;
 	key = shared = user_id = NO_VAL;
+	job_id = (long) NO_VAL;
 	priority = NO_VAL;
 
 	/* setup and basic parsing */
@@ -429,7 +459,7 @@ job_create (char *job_specs, uint16_t *new_job_id, int allocate)
 	if (contiguous == NO_VAL)
 		contiguous = 0;		/* default not contiguous */
 	if (job_id != NO_VAL && 
-	    find_job_record ((uint16_t) job_id)) {
+	    find_job_record ((uint32_t) job_id)) {
 		info  ("job_create: Duplicate job id %d", job_id);
 		error_code = EINVAL;
 		goto cleanup;
@@ -556,11 +586,14 @@ job_create (char *job_specs, uint16_t *new_job_id, int allocate)
 	strncpy (job_ptr->partition, part_ptr->name, MAX_NAME_LEN);
 	job_ptr->part_ptr = part_ptr;
 	if (job_id != NO_VAL)
-		job_ptr->job_id = (uint16_t) job_id;
+		job_ptr->job_id = (uint32_t) job_id;
 	else
 		set_job_id(job_ptr);
-	if (job_ptr->job_id > max_job_id)
-		max_job_id = job_ptr->job_id;
+	if (job_hash[job_ptr->job_id % MAX_JOB_COUNT]) 
+		job_hash_over[max_hash_over++] = job_ptr;
+	else
+		job_hash[job_ptr->job_id % MAX_JOB_COUNT] = job_ptr;
+
 	if (job_name) {
 		strcpy (job_ptr->name, job_name);
 		xfree (job_name);
@@ -597,6 +630,8 @@ job_create (char *job_specs, uint16_t *new_job_id, int allocate)
 	/* detail_ptr->total_procs	*leave as NULL pointer for now */
 
 	*new_job_id = job_ptr->job_id;
+	if (job_rec_ptr)
+		*job_rec_ptr = job_ptr;
 	return 0;
 
       cleanup:
@@ -650,11 +685,13 @@ job_unlock ()
  * input: job_entry - pointer to job_record to delete
  * global: job_list - pointer to global job list
  *	job_count - count of job list entries
+ *	job_hash, job_hash_over, max_hash_over - hash table into job records
  */
 void 
 list_delete_job (void *job_entry)
 {
 	struct job_record *job_record_point;
+	int i, j;
 
 	job_record_point = (struct job_record *) job_entry;
 	if (job_record_point == NULL)
@@ -662,6 +699,20 @@ list_delete_job (void *job_entry)
 	if (job_record_point->magic != JOB_MAGIC)
 		fatal ("list_delete_job: passed invalid job pointer");
 
+	if (job_hash[job_record_point->job_id] == job_record_point)
+		job_hash[job_record_point->job_id] = NULL;
+	else {
+		for (i=0; i<max_hash_over; i++) {
+			if (job_hash_over[i] != job_record_point)
+				continue;
+			for (j=i+1; j<max_hash_over; j++) {
+				job_hash_over[j-1] = job_hash_over[j];
+			}
+			job_hash_over[--max_hash_over] = NULL;
+			break;
+		}
+	}
+
 	delete_job_details (job_record_point);
 
 	if (job_record_point->nodes)
@@ -681,10 +732,7 @@ list_delete_job (void *job_entry)
 int 
 list_find_job_id (void *job_entry, void *key) 
 {
-	if (*((uint16_t *) key) > max_job_id)
-		return 0;
-
-	if (((struct job_record *) job_entry)->job_id == *((uint16_t *) key))
+	if (((struct job_record *) job_entry)->job_id == *((uint32_t *) key))
 		return 1;
 	return 0;
 }
@@ -801,7 +849,7 @@ pack_job (struct job_record *dump_job_ptr, void **buf_ptr, int *buf_len)
 	char tmp_str[MAX_STR_PACK];
 	struct job_details *detail_ptr;
 
-	pack16  (dump_job_ptr->job_id, buf_ptr, buf_len);
+	pack32  (dump_job_ptr->job_id, buf_ptr, buf_len);
 	pack32  (dump_job_ptr->user_id, buf_ptr, buf_len);
 	pack16  ((uint16_t) dump_job_ptr->job_state, buf_ptr, buf_len);
 	pack32  (dump_job_ptr->time_limit, buf_ptr, buf_len);
@@ -905,7 +953,7 @@ parse_job_specs (char *job_specs, char **req_features, char **req_node_list,
 		 int *contiguous, int *req_cpus, int *req_nodes,
 		 int *min_cpus, int *min_memory, int *min_tmp_disk, int *key,
 		 int *shared, int *dist, char **script, int *time_limit, 
-		 int *procs_per_task, int *job_id, int *priority, 
+		 int *procs_per_task, long *job_id, int *priority, 
 		 int *user_id) {
 	int bad_index, error_code, i;
 	char *temp_specs, *contiguous_str, *dist_str, *shared_str;
@@ -926,7 +974,7 @@ parse_job_specs (char *job_specs, char **req_features, char **req_node_list,
 		"Distribution=", 's', &dist_str, 
 		"Features=", 's', req_features, 
 		"Groups=", 's', req_group, 
-		"JobId=", 'd', job_id, 
+		"JobId=", 'l', job_id, 
 		"JobName=", 's', job_name, 
 		"Key=", 'd', key, 
 		"MinProcs=", 'd', min_cpus, 
@@ -1096,22 +1144,27 @@ reset_job_bitmaps ()
 void
 set_job_id (struct job_record *job_ptr)
 {
-	static uint16_t id_sequence = 0;
-	uint16_t new_id;
+	static uint32_t id_sequence = (1 << 16);
+	uint32_t new_id;
 
 	if ((job_ptr == NULL) || 
 	    (job_ptr->magic != JOB_MAGIC)) 
 		fatal ("set_job_id: invalid job_ptr");
 	if ((job_ptr->partition == NULL) || (strlen(job_ptr->partition) == 0))
 		fatal ("set_job_id: partition not set");
+
+/* Include below code only if fear of rolling over 32 bit job IDs */
+#ifdef HUGE_JOB_ID
 	while (1) {
 		new_id = id_sequence++;
 		if (find_job_record(new_id) == NULL) {
 			job_ptr->job_id = new_id;
-			max_job_id = new_id;
 			break;
 		}
 	}
+#else
+	job_ptr->job_id = id_sequence++;
+#endif
 }
 
 
@@ -1143,13 +1196,13 @@ set_job_prio (struct job_record *job_ptr)
  * NOTE: only the job's priority and time_limt may be changed
  */
 int 
-update_job (uint16_t job_id, char *spec) 
+update_job (uint32_t job_id, char *spec) 
 {
 	int bad_index, error_code, i, time_limit;
 	int prio;
 	struct job_record *job_ptr;
 
-	job_ptr = list_find_first (job_list, &list_find_job_id, &job_id);
+	job_ptr = find_job_record (job_id);
 	if (job_ptr == NULL) {
 		error ("update_job: job_id %u does not exist.", job_id);
 		return ENOENT;
diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c
index eb4ee43b320..d2659cdb7b1 100644
--- a/src/slurmctld/node_scheduler.c
+++ b/src/slurmctld/node_scheduler.c
@@ -43,7 +43,7 @@ int
 main (int argc, char *argv[]) 
 {
 	int error_code, error_count = 0, line_num, i;
-	uint16_t job_id;
+	uint32_t job_id;
 	FILE *command_file;
 	char in_line[BUF_SIZE], *node_list;
 	log_options_t opts = LOG_OPTS_STDERR_ONLY;
diff --git a/src/slurmctld/parse_spec.c b/src/slurmctld/parse_spec.c
index 7fe6329e2ed..b0fbd8a3368 100644
--- a/src/slurmctld/parse_spec.c
+++ b/src/slurmctld/parse_spec.c
@@ -39,6 +39,7 @@ slurm_parser (char *spec, ...)
 	va_list ap;
 	char *keyword, **str_ptr;
 	int error_code, *int_ptr, type;
+	long *long_ptr;
 	float *float_ptr;
 	
 	error_code = 0;
@@ -57,6 +58,10 @@ slurm_parser (char *spec, ...)
 			float_ptr = va_arg(ap, float *);
 			error_code = load_float(float_ptr, keyword, spec);
 			break;
+		case 'l':
+			long_ptr = va_arg(ap, long *);
+			error_code = load_long(long_ptr, keyword, spec);
+			break;
 		case 's':
 			str_ptr = va_arg(ap, char **);
 			error_code = load_string(str_ptr, keyword, spec);
@@ -158,6 +163,58 @@ load_integer (int *destination, char *keyword, char *in_line)
 }
 
 
+/*
+ * load_long - parse a string for a keyword, value pair  
+ * input: *destination - location into which result is stored
+ *        keyword - string to search for
+ *        in_line - string to search for keyword
+ * output: *destination - set to value, no change if value not found, 
+ *             set to 1 if keyword found without value, 
+ *             set to -1 if keyword followed by "unlimited"
+ *         in_line - the keyword and value (if present) are overwritten by spaces
+ *         return value - 0 if no error, otherwise an error code
+ * NOTE: in_line is overwritten, do not use a constant
+ */
+int 
+load_long (long *destination, char *keyword, char *in_line) 
+{
+	char scratch[BUF_SIZE];	/* scratch area for parsing the input line */
+	char *str_ptr1, *str_ptr2, *str_ptr3;
+	int i, str_len1, str_len2;
+
+	str_ptr1 = (char *) strstr (in_line, keyword);
+	if (str_ptr1 != NULL) {
+		str_len1 = strlen (keyword);
+		strcpy (scratch, str_ptr1 + str_len1);
+		if ((scratch[0] == (char) NULL) || 
+		    (isspace ((int) scratch[0]))) {	/* keyword with no value set */
+			*destination = 1;
+			str_len2 = 0;
+		}
+		else {
+			str_ptr2 =
+				(char *) strtok_r (scratch, SEPCHARS, &str_ptr3);
+			str_len2 = strlen (str_ptr2);
+			if (strcmp (str_ptr2, "UNLIMITED") == 0)
+				*destination = -1L;
+			else if ((str_ptr2[0] >= '0') && (str_ptr2[0] <= '9')) {
+				*destination = strtol (scratch, (char **) NULL, 10);
+			}
+			else {
+				error ("load_long: bad value for keyword %s\n",
+					keyword);
+				return EINVAL;
+			}
+		}
+
+		for (i = 0; i < (str_len1 + str_len2); i++) {
+			str_ptr1[i] = ' ';
+		}
+	}
+	return 0;
+}
+
+
 /*
  * load_string - parse a string for a keyword, value pair  
  * input: *destination - location into which result is stored
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index 918c3ca5f14..dccd517ecd6 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -120,12 +120,12 @@ extern time_t last_step_update;	/* time of last update to job steps */
 extern char *job_state_string[];
 
 /* Don't accept more jobs once there are MAX_JOB_COUNT in the system */
-/* Purge OK for jobs over MIN_JOB_AGE minues old (since completion) */
+/* Purge OK for jobs over MIN_JOB_AGE seconds old (since completion) */
 /* This should prevent exhausting memory */
 #define DETAILS_MAGIC 0xdea84e7
 #define JOB_MAGIC 0xf0b7392c
 #define MAX_JOB_COUNT 1000
-#define MIN_JOB_AGE 10
+#define MIN_JOB_AGE 600
 #define STEP_MAGIC 0xce593bc1
 
 extern int job_count;			/* number of jobs in the system */
@@ -151,7 +151,7 @@ struct job_details {
 };
 
 struct job_record {
-	uint16_t job_id;		/* job ID */
+	uint32_t job_id;		/* job ID */
 	uint32_t magic;			/* magic cookie to test data integrity */
 	char name[MAX_NAME_LEN];	/* name of the job */
 	char partition[MAX_NAME_LEN];	/* name of the partition */
@@ -310,7 +310,7 @@ extern void  delete_job_details (struct job_record *job_entry);
  * output: return 0 on success, errno otherwise
  * global: job_list - pointer to global job list
  */
-extern int delete_job_record (uint16_t job_id);
+extern int delete_job_record (uint32_t job_id);
 
 /* 
  * delete_node_record - delete record for node with specified name
@@ -335,7 +335,7 @@ extern int delete_part_record (char *name);
  * output: return 0 on success, errno otherwise
  * global: step_list - global step list
  */
-extern int delete_step_record (uint16_t job_id, uint16_t step_id);
+extern int delete_step_record (uint32_t job_id, uint16_t step_id);
 
 /* 
  * find_job_record - return a pointer to the job record with the given job_id
@@ -343,7 +343,7 @@ extern int delete_step_record (uint16_t job_id, uint16_t step_id);
  * output: pointer to the job's record, NULL on error
  * global: job_list - global job list pointer
  */
-extern struct job_record *find_job_record (uint16_t job_id);
+extern struct job_record *find_job_record (uint32_t job_id);
 
 /* 
  * find_node_record - find a record for node with specified name,
@@ -367,7 +367,7 @@ extern struct part_record *find_part_record (char *name);
  * output: pointer to the job step's record, NULL on error
  * global: step_list - global step list
  */
-extern struct step_record *find_step_record (uint16_t job_id, uint16_t step_id);
+extern struct step_record *find_step_record (uint32_t job_id, uint16_t step_id);
 
 /* 
  * init_job_conf - initialize the job configuration tables and values. 
@@ -434,7 +434,7 @@ extern int  is_key_valid (int key);
  *	default_part_loc - pointer to default partition 
  * NOTE: the calling program must xfree the memory pointed to by node_list
  */
-extern int job_allocate (char *job_specs, uint16_t *new_job_id, char **node_list);
+extern int job_allocate (char *job_specs, uint32_t *new_job_id, char **node_list);
 
 /* 
  * job_cancel - cancel the specified job
@@ -444,20 +444,23 @@ extern int job_allocate (char *job_specs, uint16_t *new_job_id, char **node_list
  * global: job_list - pointer global job list
  *	last_job_update - time of last job table update
  */
-extern int job_cancel (uint16_t job_id);
+extern int job_cancel (uint32_t job_id);
 
 /*
  * job_create - parse the suppied job specification and create job_records for it
  * input: job_specs - job specifications
  *	new_job_id - location for storing new job's id
+ *	job_rec_ptr - place to park pointer to the job (or NULL)
  * output: new_job_id - the job's ID
  *	returns 0 on success, EINVAL if specification is invalid
  *	allocate - if set, job allocation only (no script required)
+ *	job_rec_ptr - pointer to the job (if not passed a NULL)
  * globals: job_list - pointer to global job list 
  *	list_part - global list of partition info
  *	default_part_loc - pointer to default partition 
  */
-extern int job_create (char *job_specs, uint16_t *new_job_id, int allocate);
+extern int job_create (char *job_specs, uint32_t *new_job_id, int allocate, 
+	    struct job_record **job_rec_ptr);
 
 /* job_lock - lock the job information */
 extern void job_lock ();
@@ -721,7 +724,7 @@ extern int parse_job_specs (char *job_specs, char **req_features, char **req_nod
 		 int *contiguous, int *req_cpus, int *req_nodes,
 		 int *min_cpus, int *min_memory, int *min_tmp_disk, int *key,
 		 int *shared, int *dist, char **script, int *time_limit, 
-		 int *procs_per_task, int *job_id, int *priority, 
+		 int *procs_per_task, long *job_id, int *priority, 
 		 int *user_id);
 
 /* part_lock - lock the partition information */
@@ -844,7 +847,7 @@ extern int slurm_parser (char *spec, ...);
  *	list_part - global list of partition info
  *	default_part_loc - pointer to default partition 
  */
-extern int step_create (char *step_specs, uint16_t *new_job_id, int allocate);
+extern int step_create (char *step_specs, uint32_t *new_job_id, int allocate);
 
 /* step_lock - lock the step information 
  * global: step_mutex - semaphore for the step table
@@ -865,7 +868,7 @@ extern void step_unlock ();
  * NOTE: the contents of spec are overwritten by white space
  * NOTE: only the job's priority and time_limt may be changed once queued
  */
-extern int update_job (uint16_t job_id, char *spec);
+extern int update_job (uint32_t job_id, char *spec);
 
 /* 
  * update_node - update the configuration data for one or more nodes
diff --git a/src/slurmctld/step_mgr.c b/src/slurmctld/step_mgr.c
index d55d76af5a4..5d327c58b1a 100644
--- a/src/slurmctld/step_mgr.c
+++ b/src/slurmctld/step_mgr.c
@@ -107,7 +107,7 @@ create_step_record (int *error_code)
  * global: step_list - global step list
  */
 int 
-delete_step_record (uint16_t job_id, uint16_t step_id) 
+delete_step_record (uint32_t job_id, uint16_t step_id) 
 {
 	ListIterator step_record_iterator;
 	struct step_record *step_record_point;
@@ -147,7 +147,7 @@ delete_step_record (uint16_t job_id, uint16_t step_id)
  * global: step_list - global step list
  */
 struct step_record *
-find_step_record(uint16_t job_id, uint16_t step_id) 
+find_step_record(uint32_t job_id, uint16_t step_id) 
 {
 	ListIterator step_record_iterator;
 	struct step_record *step_record_point;
@@ -279,9 +279,9 @@ pack_step (struct step_record *dump_step_ptr, void **buf_ptr, int *buf_len)
 	char node_inx_ptr[BUF_SIZE];
 
 	if (dump_step_ptr->job_ptr)
-		pack16 (dump_step_ptr->job_ptr->job_id, buf_ptr, buf_len);
+		pack32 (dump_step_ptr->job_ptr->job_id, buf_ptr, buf_len);
 	else
-		pack16 (0, buf_ptr, buf_len);
+		pack32 (0, buf_ptr, buf_len);
 
 	pack16  (dump_step_ptr->step_id, buf_ptr, buf_len);
 	pack16  (dump_step_ptr->dist, buf_ptr, buf_len);
@@ -318,7 +318,7 @@ pack_step (struct step_record *dump_step_ptr, void **buf_ptr, int *buf_len)
  * NOTE: the calling program must xfree the memory pointed to by new_job_id
  */
 int
-step_create (char *step_specs, uint16_t *new_job_id, int allocate)
+step_create (char *step_specs, uint32_t *new_job_id, int allocate)
 {
 	return EINVAL;
 }
-- 
GitLab