From 99cd7e74ed84cb1b5794fdfd93e2d24e15c59ae0 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Mon, 6 Oct 2008 23:26:12 +0000
Subject: [PATCH] svn merge -r15285:15315
 https://eris.llnl.gov/svn/slurm/branches/slurm-1.3

---
 NEWS                                          | 10 +++++
 doc/man/man1/sbatch.1                         |  3 ++
 doc/man/man1/srun.1                           |  5 ++-
 doc/man/man5/slurm.conf.5                     | 29 +++++++++++----
 src/common/read_config.c                      |  1 +
 src/common/slurm_protocol_defs.c              |  3 ++
 src/plugins/jobcomp/filetxt/jobcomp_filetxt.c | 37 +++++++++++++++++--
 src/sbatch/opt.c                              |  2 +
 src/slurmctld/node_mgr.c                      |  9 +++--
 src/slurmctld/proc_req.c                      |  9 +++--
 src/srun/opt.c                                |  2 +
 11 files changed, 90 insertions(+), 20 deletions(-)

diff --git a/NEWS b/NEWS
index 86db691a896..b28b7dc93dc 100644
--- a/NEWS
+++ b/NEWS
@@ -107,6 +107,16 @@ documents those changes that are of interest to users and admins.
  -- Fix bug in logic to remove whitespace from plugstack.conf.
  -- Add new configuration parameter SallocDefaultCommand to control what 
     shell that salloc launches by default.
+ -- When enforcing PrivateData configuration parameter, failures return 
+    "Access/permission denied" rather than "Invalid user id".
+ -- From sbatch and srun, if the --dependency option is specified then set 
+    the environment variable SLURM_JOB_DEPENDENCY to the same value.
+ -- In plugin jobcomp/filetxt, use ISO8601 formats for time by default (e.g. 
+    YYYY-MM-DDTHH:MM:SS rather than MM/DD-HH:MM:SS). This restores the default
+    behavior from Slurm version 1.2. Change the value of USE_ISO8601 in
+    src/plusings/jobcomp/filetxt/jobcomp_filetxt.c to revert the behavior.
+ -- Add support for configuration option of ReturnToService=2, which will 
+    return a DOWN to use if the node was previous set DOWN for any reason.
 
 * Changes in SLURM 1.3.8
 ========================
diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1
index 838ac2bfd6c..e8283f90c6f 100644
--- a/doc/man/man1/sbatch.1
+++ b/doc/man/man1/sbatch.1
@@ -783,6 +783,9 @@ The select/cons_res plugin allocates individual processors
 to jobs, so this number indicates the number of processors
 on this node allocated to the job.
 .TP
+\fBSLURM_JOB_DEPENDENCY\fR
+Set to value of the \-\-dependency option.
+.TP
 \fBSLURM_JOB_NAME\fR
 Name of the job.
 .TP
diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1
index cb709e6200e..cb967334742 100644
--- a/doc/man/man1/srun.1
+++ b/doc/man/man1/srun.1
@@ -1310,10 +1310,13 @@ on this node allocated to the job.
 \fBSLURM_GTIDS\fR
 Global task IDs running on this node.
 Zero origin and comma separated.
-
+.TP
+\fBSLURM_JOB_DEPENDENCY\fR
+Set to value of the \-\-dependency option.
 .TP
 \fBSLURM_JOBID\fR
 Job id of the executing job
+
 .TP
 \fBSLURM_LAUNCH_NODE_IPADDR\fR
 IP address of the node from which the task launch was 
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index 6cd295e2023..ca4df80e56e 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -843,14 +843,27 @@ Related configuration options include \fBResumeProgram\fR, \fBSuspendRate\fR,
 
 .TP
 \fBReturnToService\fR
-If set to 1, then a non\-responding (DOWN) node will become available 
-for use upon registration. Note that DOWN node's state will be changed 
-only if it was set DOWN due to being non\-responsive. If the node was 
-set DOWN for any other reason (low memory, prolog failure, epilog 
-failure, etc.), its state will not automatically be changed.  The 
-default value is 0, which means that a node will remain in the 
-DOWN state until a system administrator explicitly changes its state
-(even if the slurmd daemon registers and resumes communications).
+Controls when a DOWN node will be returned to service. 
+The default value is 0.
+Supported values include
+.RS
+.TP 4
+\fB0\fR
+A node will remain in the DOWN state until a system administrator
+explicitly changes its state (even if the slurmd daemon registers
+and resumes communications).
+.TP
+\fB1\fR
+A non\-responding (DOWN) node will become available for use upon
+registration. Note that DOWN node's state will be changed only if
+it was set DOWN due to being non\-responsive. If the node was
+set DOWN for any other reason (low memory, prolog failure, epilog
+failure, etc.), its state will not automatically be changed.  
+.TP
+\fB2\fR
+A DOWN node will become available for use upon registration with a
+valid configuration.  The node could have been set DOWN for any reason. 
+.RE
 
 .TP
 \fBSallocDefaultCommand\fR
diff --git a/src/common/read_config.c b/src/common/read_config.c
index 492b8ba517a..40ac4b37888 100644
--- a/src/common/read_config.c
+++ b/src/common/read_config.c
@@ -1271,6 +1271,7 @@ init_slurm_conf (slurm_ctl_conf_t *ctl_conf_ptr)
 	xfree (ctl_conf_ptr->resume_program);
 	ctl_conf_ptr->resume_rate		= (uint16_t) NO_VAL;
 	ctl_conf_ptr->ret2service		= (uint16_t) NO_VAL;
+	xfree( ctl_conf_ptr->salloc_default_command);
 	xfree( ctl_conf_ptr->sched_params );
 	ctl_conf_ptr->sched_time_slice		= (uint16_t) NO_VAL;
 	xfree( ctl_conf_ptr->schedtype );
diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c
index fc37eacce36..bac3161262c 100644
--- a/src/common/slurm_protocol_defs.c
+++ b/src/common/slurm_protocol_defs.c
@@ -755,6 +755,9 @@ private_data_string(uint16_t private_data, char *str, int str_len)
 		strcat(str, "accounts"); //9 len
 	}
 	// total len 42
+
+	if (str[0] == '\0')
+		strcat(str, "none");
 }
 
 char *job_state_string(enum job_states inx)
diff --git a/src/plugins/jobcomp/filetxt/jobcomp_filetxt.c b/src/plugins/jobcomp/filetxt/jobcomp_filetxt.c
index 77b558162e3..6180f24d1a9 100644
--- a/src/plugins/jobcomp/filetxt/jobcomp_filetxt.c
+++ b/src/plugins/jobcomp/filetxt/jobcomp_filetxt.c
@@ -56,6 +56,8 @@
 #include "src/common/uid.h"
 #include "filetxt_jobcomp_process.h"
 
+#define USE_ISO8601 1
+
 /*
  * These variables are required by the generic plugin interface.  If they
  * are not found in the plugin, the plugin loader will ignore it.
@@ -208,6 +210,36 @@ extern int slurm_jobcomp_set_location ( char * location )
 	return rc;
 }
 
+/* This is a variation of slurm_make_time_str() in src/common/parse_time.h
+ * This version uses ISO8601 format by default. */
+static void _make_time_str (time_t *time, char *string, int size)
+{
+	struct tm time_tm;
+
+	localtime_r(time, &time_tm);
+	if ( *time == (time_t) 0 ) {
+		snprintf(string, size, "Unknown");
+	} else {
+#if USE_ISO8601
+		/* Format YYYY-MM-DDTHH:MM:SS, ISO8601 standard format,
+		 * NOTE: This is expected to break Maui, Moab and LSF
+		 * schedulers management of SLURM. */
+		snprintf(string, size,
+			"%4.4u-%2.2u-%2.2uT%2.2u:%2.2u:%2.2u",
+			(time_tm.tm_year + 1900), (time_tm.tm_mon+1), 
+			time_tm.tm_mday, time_tm.tm_hour, time_tm.tm_min, 
+			time_tm.tm_sec);
+#else
+		/* Format MM/DD-HH:MM:SS */
+		snprintf(string, size,
+			"%2.2u/%2.2u-%2.2u:%2.2u:%2.2u",
+			(time_tm.tm_mon+1), time_tm.tm_mday,
+			time_tm.tm_hour, time_tm.tm_min, time_tm.tm_sec);
+
+#endif
+	}
+}
+
 extern int slurm_jobcomp_log_record ( struct job_record *job_ptr )
 {
 	int rc = SLURM_SUCCESS;
@@ -236,9 +268,8 @@ extern int slurm_jobcomp_log_record ( struct job_record *job_ptr )
 	 * JOB_FAILED, JOB_TIMEOUT, etc. */
 	job_state = job_ptr->job_state & (~JOB_COMPLETING);
 
-	slurm_make_time_str(&(job_ptr->start_time),
-			    start_str, sizeof(start_str));
-	slurm_make_time_str(&(job_ptr->end_time), end_str, sizeof(end_str));
+	_make_time_str(&(job_ptr->start_time), start_str, sizeof(start_str));
+	_make_time_str(&(job_ptr->end_time), end_str, sizeof(end_str));
 
 	select_g_sprint_jobinfo(job_ptr->select_jobinfo,
 		select_buf, sizeof(select_buf), SELECT_PRINT_MIXED);
diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c
index 6fa693ccbe9..37355194653 100644
--- a/src/sbatch/opt.c
+++ b/src/sbatch/opt.c
@@ -1893,6 +1893,8 @@ static bool _opt_verify(void)
 		error( "--propagate=%s is not valid.", opt.propagate );
 		verified = false;
 	}
+	if (opt.dependency)
+		setenvfs("SLURM_JOB_DEPENDENCY=%s", opt.dependency);
 
 	if (opt.acctg_freq >= 0)
 		setenvf(NULL, "SLURM_ACCTG_FREQ", "%d", opt.acctg_freq); 
diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c
index 5864b3ae378..2b81c4a87e7 100644
--- a/src/slurmctld/node_mgr.c
+++ b/src/slurmctld/node_mgr.c
@@ -1631,10 +1631,11 @@ extern int validate_node_specs(slurm_node_registration_status_msg_t *reg_msg)
 						      slurmctld_cluster_name,
 						      node_ptr, now);
 		} else if ((base_state == NODE_STATE_DOWN) &&
-		           (slurmctld_conf.ret2service == 1) &&
-			   (node_ptr->reason != NULL) && 
-			   (strncmp(node_ptr->reason, "Not responding", 14) 
-					== 0)) {
+			   ((slurmctld_conf.ret2service == 2) ||
+		            ((slurmctld_conf.ret2service == 1) &&
+			     (node_ptr->reason != NULL) && 
+			     (strncmp(node_ptr->reason, "Not responding", 14) 
+					== 0)))) {
 			last_node_update = time (NULL);
 			if (reg_msg->job_count) {
 				node_ptr->node_state = NODE_STATE_ALLOCATED |
diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c
index fb0f842bc29..298f2a6b556 100644
--- a/src/slurmctld/proc_req.c
+++ b/src/slurmctld/proc_req.c
@@ -434,7 +434,8 @@ void _fill_ctld_conf(slurm_ctl_conf_t * conf_ptr)
 	conf_ptr->resume_rate         = conf->resume_rate;
 	conf_ptr->ret2service         = conf->ret2service;
 
-	conf_ptr->salloc_default_command = xstrdup(conf->salloc_default_command);
+	conf_ptr->salloc_default_command = xstrdup(conf->
+						   salloc_default_command);
 	if (conf->sched_params)
 		conf_ptr->sched_params = xstrdup(conf->sched_params);
 	else
@@ -871,7 +872,7 @@ static void _slurm_rpc_dump_nodes(slurm_msg_t * msg)
 	&&  (!validate_super_user(uid))) {
 		unlock_slurmctld(node_read_lock);
 		error("Security violation, REQUEST_NODE_INFO RPC from uid=%d", uid);
-		slurm_send_rc_msg(msg, ESLURM_USER_ID_MISSING);
+		slurm_send_rc_msg(msg, ESLURM_ACCESS_DENIED);
 	} else if ((node_req_msg->last_update - 1) >= last_node_update) {
 		unlock_slurmctld(node_read_lock);
 		debug2("_slurm_rpc_dump_nodes, no change");
@@ -921,7 +922,7 @@ static void _slurm_rpc_dump_partitions(slurm_msg_t * msg)
 	&&  (!validate_super_user(uid))) {
 		unlock_slurmctld(part_read_lock);
 		debug2("Security violation, PARTITION_INFO RPC from uid=%d", uid);
-		slurm_send_rc_msg(msg, ESLURM_USER_ID_MISSING);
+		slurm_send_rc_msg(msg, ESLURM_ACCESS_DENIED);
 	} else if ((part_req_msg->last_update - 1) >= last_part_update) {
 		unlock_slurmctld(part_read_lock);
 		debug2("_slurm_rpc_dump_partitions, no change");
@@ -2335,7 +2336,7 @@ static void  _slurm_rpc_node_select_info(slurm_msg_t * msg)
 	lock_slurmctld(config_read_lock);
 	if ((slurmctld_conf.private_data & PRIVATE_DATA_NODES)
 	&&  (!validate_super_user(uid))) {
-		error_code = ESLURM_USER_ID_MISSING;
+		error_code = ESLURM_ACCESS_DENIED;
 		error("Security violation, NODE_SELECT_INFO RPC from uid=u",
 			(unsigned int) uid);
 	} 
diff --git a/src/srun/opt.c b/src/srun/opt.c
index 4f567e29089..8c57987720b 100644
--- a/src/srun/opt.c
+++ b/src/srun/opt.c
@@ -1655,6 +1655,8 @@ static void _opt_args(int argc, char **argv)
 		setenv("SLURM_NETWORK", opt.network, 1);
 	}
 #endif
+	if (opt.dependency)
+		setenvfs("SLURM_JOB_DEPENDENCY=%s", opt.dependency);
 
 	if (opt.nodelist && (!opt.test_only)) {
 #ifdef HAVE_BG
-- 
GitLab