From 05f38e08bcf3ed063f9d9efd4221c82582bfecc8 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Mon, 10 Mar 2008 16:12:50 +0000
Subject: [PATCH] svn merge -r13515:13533
 https://eris.llnl.gov/svn/slurm/branches/slurm-1.2

---
 NEWS                             |   5 +-
 slurm/slurm.h.in                 |  95 +++++++++++++--------------
 src/api/job_info.c               |   4 +-
 src/common/slurm_protocol_pack.c |   2 +-
 src/scontrol/update_job.c        |  10 +++
 src/slurmctld/job_mgr.c          |  31 +++++++--
 src/slurmctld/slurmctld.h        | 107 ++++++++++++++++---------------
 7 files changed, 146 insertions(+), 108 deletions(-)

diff --git a/NEWS b/NEWS
index 36b9c098ae1..a0f61ba81c8 100644
--- a/NEWS
+++ b/NEWS
@@ -6,6 +6,8 @@ documents those changes that are of interest to users and admins.
  -- Restructure the sbcast RPC to take advantage of larger buffers available
     in Slurm v1.3 RPCs.
  -- Fix several memory leaks.
+ -- In scontrol, show job's Requeue value, permit change of Requeue and COmment
+    values.
 
 * Changes in SLURM 1.3.0-pre10
 ==============================
@@ -204,7 +206,8 @@ documents those changes that are of interest to users and admins.
     to control message timeout.
  -- Add threaded agent to manage a queue of Gold update requests for 
     performance reasons.
- -- Add slloc options --chdir and --get-user-env.
+ -- Add salloc options --chdir and --get-user-env (for Moab).
+ -- Modify scontrol update to support job comment changes.
 
 * Changes in SLURM 1.2.24
 =========================
diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in
index dedb2dfdcd8..8eee044c451 100644
--- a/slurm/slurm.h.in
+++ b/slurm/slurm.h.in
@@ -577,27 +577,62 @@ typedef struct job_descriptor {	/* For submit, allocate, and update requests */
 } job_desc_msg_t;
 
 typedef struct job_info {
-	uint32_t job_id;	/* job ID */
-	char *name;		/* name of the job */
-	uint16_t batch_flag;	/* 1 if batch: queued job with script */
-	uint32_t alloc_sid;	/* local sid making resource alloc */
+	char *account;		/* charge to specified account */
 	char    *alloc_node;	/* local node making resource alloc */
-	uint32_t user_id;	/* user the job runs as */
+	uint32_t alloc_sid;	/* local sid making resource alloc */
+	uint16_t batch_flag;	/* 1 if batch: queued job with script */
+	char *command;		/* command to be executed */
+	char *comment;		/* arbitrary comment (used by Moab scheduler) */
+	uint16_t contiguous;	/* 1 if job requires contiguous nodes */
+	uint16_t cpus_per_task;	/* number of processors required for each task */
+	char *dependency;	/* syncrhonize job execution with other jobs */
+	time_t end_time;	/* time of termination, actual or expected */
+	char *exc_nodes;	/* comma separated list of excluded nodes */
+	int *exc_node_inx;	/* excluded list index pairs into node_table:
+				 * start_range_1, end_range_1, 
+				 * start_range_2, .., -1  */
+	uint32_t exit_code;	/* exit code for job (status from wait call) */
+	char *features;		/* comma separated list of required features */
 	uint32_t group_id;	/* group job sumitted as */
+	uint32_t job_id;	/* job ID */
+	uint16_t job_min_cores;    /* minimum cores per processor, default=0 */
+	uint32_t job_min_memory;   /* minimum real memory per node, default=0 */
+	uint16_t job_min_procs;	   /* minimum processors per node, default=0 */
+	uint16_t job_min_sockets;  /* minimum sockets per node, default=0 */
+	uint16_t job_min_threads;  /* minimum threads per core, default=0 */
+	uint32_t job_min_tmp_disk; /* minimum tmp disk per node, default=0 */
 	uint16_t job_state;	/* state of the job, see enum job_states */
-	uint32_t time_limit;	/* maximum run time in minutes or INFINITE */
-	time_t submit_time;	/* time of job submission */
-	time_t start_time;	/* time execution begins, actual or expected */
-	time_t end_time;	/* time of termination, actual or expected */
-	time_t suspend_time;	/* time job last suspended or resumed */
-	time_t pre_sus_time;	/* time job ran prior to last suspend */
-	uint32_t priority;	/* relative priority of the job, 
-				 * 0=held, 1=required nodes DOWN/DRAINED */
+	char *licenses;		/* licenses required by the job */
+	char *name;		/* name of the job */
+	char *network;		/* network specification */
 	char *nodes;		/* list of nodes allocated to job */
 	int *node_inx;		/* list index pairs into node_table for *nodes:
 				 * start_range_1, end_range_1, 
 				 * start_range_2, .., -1  */
+	uint16_t ntasks_per_core;/* number of tasks to invoke on each core */
+	uint16_t ntasks_per_node;/* number of tasks to invoke on each node */
+	uint16_t ntasks_per_socket;/* number of tasks to invoke on each socket */
 	char *partition;	/* name of assigned partition */
+	time_t pre_sus_time;	/* time job ran prior to last suspend */
+	uint32_t priority;	/* relative priority of the job, 
+				 * 0=held, 1=required nodes DOWN/DRAINED */
+	char *req_nodes;	/* comma separated list of required nodes */
+	int *req_node_inx;	/* required list index pairs into node_table: 
+				 * start_range_1, end_range_1, 
+				 * start_range_2, .., -1  */
+        uint16_t requeue;       /* enable or disable job requeue option */
+	select_jobinfo_t select_jobinfo; /* opaque data type,
+				 * process using select_g_get_jobinfo() */
+	uint16_t shared;	/* 1 if job can share nodes with other jobs */
+	time_t start_time;	/* time execution begins, actual or expected */
+	uint16_t state_reason;	/* reason job still pending or failed, see
+				 * slurm.h:enum job_state_reason */
+	time_t submit_time;	/* time of job submission */
+	time_t suspend_time;	/* time job last suspended or resumed */
+	uint32_t time_limit;	/* maximum run time in minutes or INFINITE */
+	uint32_t user_id;	/* user the job runs as */
+	char *work_dir;		/* pathname of working directory */
+
 	uint16_t num_cpu_groups;/* elements in below cpu arrays */
 	uint32_t *cpus_per_node;/* cpus per node */
 	uint32_t *cpu_count_reps;/* how many nodes have same cpu count */
@@ -610,40 +645,6 @@ typedef struct job_info {
 	uint16_t max_cores;	/* maximum number of cores per cpu */
 	uint16_t min_threads;	/* minimum number of threads per core */
 	uint16_t max_threads;	/* maximum number of threads per core */
-	uint16_t shared;	/* 1 if job can share nodes with other jobs */
-	uint16_t contiguous;	/* 1 if job requires contiguous nodes */
-	uint16_t cpus_per_task;	/* number of processors required for each task */
-	uint16_t ntasks_per_node;/* number of tasks to invoke on each node */
-	uint16_t ntasks_per_socket;/* number of tasks to invoke on each socket */
-	uint16_t ntasks_per_core;/* number of tasks to invoke on each core */
-	/* job constraints: */
-	uint16_t job_min_procs;	   /* minimum processors per node, default=0 */
-	uint16_t job_min_sockets;  /* minimum sockets per node, default=0 */
-	uint16_t job_min_cores;    /* minimum cores per processor, default=0 */
-	uint16_t job_min_threads;  /* minimum threads per core, default=0 */
-	uint32_t job_min_memory;   /* minimum real memory per node, default=0 */
-	uint32_t job_min_tmp_disk; /* minimum tmp disk per node, default=0 */
-	char *req_nodes;	/* comma separated list of required nodes */
-	int *req_node_inx;	/* required list index pairs into node_table: 
-				 * start_range_1, end_range_1, 
-				 * start_range_2, .., -1  */
-	char *exc_nodes;	/* comma separated list of excluded nodes */
-	int *exc_node_inx;	/* excluded list index pairs into node_table:
-				 * start_range_1, end_range_1, 
-				 * start_range_2, .., -1  */
-	char *features;		/* comma separated list of required features */
-	char *dependency;	/* syncrhonize job execution with other jobs */
-	uint32_t exit_code;	/* exit code for job (status from wait call) */
-	char *account;		/* charge to specified account */
-	uint16_t state_reason;	/* reason job still pending or failed, see
-				 * slurm.h:enum job_state_reason */
-	char *network;		/* network specification */
-	char *comment;		/* arbitrary comment (used by Moab scheduler) */
-	char *work_dir;		/* pathname of working directory */
-	char *command;		/* command to be executed */
-	select_jobinfo_t select_jobinfo; /* opaque data type,
-			* process using select_g_get_jobinfo() */
-	char *licenses;		/* licenses required by the job */
 } job_info_t;
 
 typedef struct job_info_msg {
diff --git a/src/api/job_info.c b/src/api/job_info.c
index e8a7ffa53d7..2df156bc8ce 100644
--- a/src/api/job_info.c
+++ b/src/api/job_info.c
@@ -381,8 +381,8 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner )
 
 	/****** Line 11 ******/
 	snprintf(tmp_line, sizeof(tmp_line), 
-		"Dependency=%s Account=%s",
-		job_ptr->dependency, job_ptr->account);
+		"Dependency=%s Account=%s Requeue=%u",
+		job_ptr->dependency, job_ptr->account, job_ptr->requeue);
 	xstrcat(out, tmp_line);
 	if (one_liner)
 		xstrcat(out, " ");
diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c
index e8b87494276..f1446dfcaea 100644
--- a/src/common/slurm_protocol_pack.c
+++ b/src/common/slurm_protocol_pack.c
@@ -2122,7 +2122,7 @@ _unpack_job_info_members(job_info_t * job, Buf buffer)
 
 	safe_unpack32(&job->num_nodes, buffer);
 	safe_unpack32(&job->max_nodes, buffer);
-
+	safe_unpack16(&job->requeue,   buffer);
 
 	/*** unpack pending job details ***/
 	safe_unpack16(&job->shared, buffer);
diff --git a/src/scontrol/update_job.c b/src/scontrol/update_job.c
index 3813fdeb44b..540c216f617 100644
--- a/src/scontrol/update_job.c
+++ b/src/scontrol/update_job.c
@@ -209,6 +209,10 @@ scontrol_update_job (int argc, char *argv[])
 			job_msg.job_id = 
 				(uint32_t) strtol(&argv[i][6], 
 						 (char **) NULL, 10);
+		else if (strncasecmp(argv[i], "Comment=", 8) == 0) {
+			job_msg.comment = &argv[i][8];
+			update_cnt++;
+		}
 		else if (strncasecmp(argv[i], "TimeLimit=", 10) == 0) {
 			int time_limit = time_str2mins(&argv[i][10]);
 			if ((time_limit < 0) && (time_limit != INFINITE)) {
@@ -247,6 +251,12 @@ scontrol_update_job (int argc, char *argv[])
 						(char **) NULL, 10);
 			update_cnt++;
 		}
+		else if (strncasecmp(argv[i], "Requeue=", 8) == 0) {
+			job_msg.requeue = 
+				(uint16_t) strtol(&argv[i][8], 
+						(char **) NULL, 10);
+			update_cnt++;
+		}
 		else if ((strncasecmp(argv[i], "MinNodes=", 9) == 0) ||
 		         (strncasecmp(argv[i], "ReqNodes=", 9) == 0)) {
 			char *tmp;
diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c
index cce759191f9..c969f049825 100644
--- a/src/slurmctld/job_mgr.c
+++ b/src/slurmctld/job_mgr.c
@@ -3098,6 +3098,7 @@ static void _pack_default_job_details(struct job_details *detail_ptr,
 
 		pack32(detail_ptr->min_nodes, buffer);
 		pack32(detail_ptr->max_nodes, buffer);
+		pack16(detail_ptr->requeue,   buffer);
 	} else {
 		packnull(buffer);
 		packnull(buffer);
@@ -3106,6 +3107,7 @@ static void _pack_default_job_details(struct job_details *detail_ptr,
 
 		pack32((uint32_t) 0, buffer);
 		pack32((uint32_t) 0, buffer);
+		pack16((uint16_t) 0, buffer);
 	}
 }
 
@@ -3785,6 +3787,9 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
 				info("update_job: setting features to %s for "
 				     "job_id %u", job_specs->features, 
 				     job_specs->job_id);
+			} else {
+				info("update_job: cleared features for job %u",
+				     job_specs->job_id);
 			}
 		} else {
 			error("Attempt to change features for job %u",
@@ -3793,11 +3798,26 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
 		}
 	}
 
+	if (job_specs->comment) {
+		xfree(job_ptr->comment);
+		job_ptr->comment = job_specs->comment;
+		job_specs->comment = NULL;	/* Nothing left to free */
+		info("update_job: setting comment to %s for job_id %u",
+		     job_ptr->comment, job_specs->job_id);
+	}
+
 	if (job_specs->name) {
 		xfree(job_ptr->name);
-		job_ptr->name = xstrdup(job_specs->name);
+		job_ptr->name = job_specs->name;
+		job_specs->name = NULL;		/* Nothing left to free */
 		info("update_job: setting name to %s for job_id %u",
-		     job_specs->name, job_specs->job_id);
+		     job_ptr->name, job_specs->job_id);
+	}
+
+	if (job_specs->requeue) {
+		detail_ptr->requeue = job_specs->requeue;
+		info("update_job: setting requeue to %u for job_id %u",
+		     job_specs->requeue, job_specs->job_id);
 	}
 
 	if (job_specs->partition) {
@@ -3881,10 +3901,13 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid)
 	if (job_specs->account) {
 		xfree(job_ptr->account);
 		if (job_specs->account[0] != '\0') {
-			job_ptr->account = job_specs->account ;
+			job_ptr->account = job_specs->account;
+			job_specs->account = NULL;  /* Nothing left to free */
 			info("update_job: setting account to %s for job_id %u",
 			     job_ptr->account, job_specs->job_id);
-			job_specs->account = NULL;
+		} else {
+			info("update_job: cleared account for job_id %u",
+			     job_specs->job_id);
 		}
 	}
 
diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h
index 82938e618e8..7be782183b1 100644
--- a/src/slurmctld/slurmctld.h
+++ b/src/slurmctld/slurmctld.h
@@ -337,73 +337,83 @@ struct job_details {
 };
 
 struct job_record {
-	uint32_t job_id;		/* job ID */
-	uint32_t magic;			/* magic cookie for data integrity */
-	char *name;			/* name of the job */
-	char *partition;		/* name of the partition */
-	struct part_record *part_ptr;	/* pointer to the partition record */
+	char *account;			/* account number to charge */
+	char    *alloc_node;		/* local node making resource alloc */
+	uint16_t alloc_resp_port;	/* RESPONSE_RESOURCE_ALLOCATION port */
+	uint32_t alloc_sid;		/* local sid making resource alloc */
 	uint16_t batch_flag;		/* 1 or 2 if batch job (with script),
 					 * 2 indicates retry mode (one retry) */
-	uint32_t user_id;		/* user the job runs as */
+	char *comment;			/* arbitrary comment */
+        uint16_t cr_enabled;            /* specify if if Consumable Resources
+                                         * is enabled. Needed since CR deals
+                                         * with a finer granularity in its
+                                         * node/cpu scheduling (available cpus
+                                         * instead of available nodes) than the
+                                         * bluegene and the linear plugins 
+                                         * 0 if cr is NOT enabled, 
+                                         * 1 if cr is enabled */
+	uint32_t db_index;              /* used only for database
+					   plugins */
+	struct job_details *details;	/* job details */
+	time_t end_time;		/* time of termination, 
+					 * actual or expected */
+	uint32_t exit_code;		/* exit code for job (status from 
+					 * wait call) */
 	uint32_t group_id;		/* group submitted under */
+	uint32_t job_id;		/* job ID */
+	struct job_record *job_next;	/* next entry with same hash index */
 	enum job_states job_state;	/* state of the job */
 	uint16_t kill_on_node_fail;	/* 1 if job should be killed on 
 					 * node failure */
 	uint16_t kill_on_step_done;	/* 1 if job should be killed when 
 					 * the job step completes, 2 if kill
 					 * in progress */
-	select_jobinfo_t select_jobinfo;/* opaque data */
+	char *licenses;			/* licenses required by the job */
+	uint16_t mail_type;		/* see MAIL_JOB_* in slurm.h */
+	char *mail_user;		/* user to get e-mail notification */
+	uint32_t magic;			/* magic cookie for data integrity */
+	char *name;			/* name of the job */
+	char *network;			/* network/switch requirement spec */
+	uint16_t next_step_id;		/* next step id to be used */
 	char *nodes;			/* list of nodes allocated to job */
+	slurm_addr *node_addr;		/* addresses of the nodes allocated to 
+					 * job */
 	bitstr_t *node_bitmap;		/* bitmap of nodes allocated to job */
+	uint32_t node_cnt;		/* count of nodes allocated to job */
 	char *nodes_completing;		/* nodes still in completing state
 					 * for this job, used to insure
 					 * epilog is not re-run for job */
 	uint32_t num_procs;		/* count of required processors */
-	uint32_t total_procs;		/* number of allocated processors, 
-					   for accounting */
-	uint32_t time_limit;		/* time_limit minutes or INFINITE,
-					 * NO_VAL implies partition max_time */
+	uint16_t other_port;		/* port for client communications */
+	char *partition;		/* name of the partition */
+	time_t pre_sus_time;		/* time job ran prior to last suspend */
+	uint32_t priority;		/* relative priority of the job,
+					 * zero == held (don't initiate) */
+	uint32_t requid;            	/* requester user ID */
+	char *resp_host;		/* host for srun communications */
+	select_jobinfo_t select_jobinfo;/* opaque data */
 	time_t start_time;		/* time execution begins, 
 					 * actual or expected */
-	time_t end_time;		/* time of termination, 
-					 * actual or expected */
+	uint16_t state_reason;		/* reason job still pending or failed
+					 * see slurm.h:enum job_wait_reason */
+	List step_list;			/* list of job's steps */
 	time_t suspend_time;		/* time job last suspended or resumed */
-	time_t pre_sus_time;		/* time job ran prior to last suspend */
-	time_t tot_sus_time;		/* total time in suspend state */
 	time_t time_last_active;	/* time of last job activity */
-	uint32_t priority;		/* relative priority of the job,
-					 * zero == held (don't initiate) */
-	struct job_details *details;	/* job details */
+	uint32_t time_limit;		/* time_limit minutes or INFINITE,
+					 * NO_VAL implies partition max_time */
+	time_t tot_sus_time;		/* total time in suspend state */
+	uint32_t total_procs;		/* number of allocated processors, 
+					   for accounting */
+	struct part_record *part_ptr;	/* pointer to the partition record */
+	uint32_t user_id;		/* user the job runs as */
+
+	/* Per node allocation details */
 	uint16_t num_cpu_groups;	/* record count in cpus_per_node and 
 					 * cpu_count_reps */
 	uint32_t *cpus_per_node;	/* array of cpus per node allocated */
 	uint32_t *cpu_count_reps;	/* array of consecutive nodes with 
 					 * same cpu count */
-	uint32_t alloc_sid;		/* local sid making resource alloc */
-	char    *alloc_node;		/* local node making resource alloc */
-	uint16_t next_step_id;		/* next step id to be used */
-	uint32_t node_cnt;		/* count of nodes allocated to job */
-	slurm_addr *node_addr;		/* addresses of the nodes allocated to 
-					 * job */
-	List step_list;			/* list of job's steps */
-	char *resp_host;		/* host for srun communications */
-	uint16_t alloc_resp_port;	/* RESPONSE_RESOURCE_ALLOCATION port */
-	uint16_t other_port;		/* port for client communications */
-	char *account;			/* account number to charge */
-	char *comment;			/* arbitrary comment */
-	char *network;			/* network/switch requirement spec */
-	struct job_record *job_next;	/* next entry with same hash index */
-        uint16_t cr_enabled;            /* specify if if Consumable
-                                         * Resources is
-                                         * enabled. Needed since CR
-                                         * deals with a finer
-                                         * granularity in its node/cpu
-                                         * scheduling (available cpus
-                                         * instead of available nodes)
-                                         * than the bluegene and the
-                                         * linear plugins 
-                                         * 0 if cr is NOT enabled, 
-                                         * 1 if cr is enabled */
+
         uint32_t alloc_lps_cnt;		/* number of hosts in alloc_lps
 					 * or 0 if alloc_lps is not needed
 					 * for the credentials */
@@ -411,16 +421,7 @@ struct job_record {
 					 * allocated for this job */
 	uint32_t *used_lps;		/* number of logical processors
 					 * already allocated to job steps */
-	char *licenses;			/* licenses required by the job */
-	uint16_t mail_type;		/* see MAIL_JOB_* in slurm.h */
-	char *mail_user;		/* user to get e-mail notification */
-	uint32_t requid;            	/* requester user ID */
-	uint32_t exit_code;		/* exit code for job (status from 
-					 * wait call) */
-	uint16_t state_reason;		/* reason job still pending or failed
-					 * see slurm.h:enum job_wait_reason */
-	uint32_t db_index;              /* used only for database
-					   plugins */
+
 };
 
 /* Job dependency specification, used in "depend_list" within job_record */
-- 
GitLab