From 87d03dd8ccaccc6775155e9f9f23c439c05b1ac1 Mon Sep 17 00:00:00 2001
From: Moe Jette <jette1@llnl.gov>
Date: Tue, 14 Sep 2010 21:22:41 +0000
Subject: [PATCH] Add support for SLURM_CLUSTERS environment variable in the
 sbatch command.

---
 NEWS                       |  1 +
 doc/man/man1/sbatch.1      | 23 +++++++++++++----------
 src/sbatch/mult_cluster.c  | 20 +++++++++++---------
 src/sbatch/opt.c           |  8 +++++---
 src/sbatch/sbatch.c        |  6 +++---
 src/slurmctld/controller.c |  5 +++--
 6 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/NEWS b/NEWS
index 12e353ad20e..e4fc8550bd2 100644
--- a/NEWS
+++ b/NEWS
@@ -8,6 +8,7 @@ documents those changes that are of interest to users and admins.
     partitions and use lua metatables to reference the job and partition fields.
  -- Add support for serveral new trigger types: SlurmDBD failure/restart,
     Database failure/restart, Slurmctld failure/restart.
+ -- Add support for SLURM_CLUSTERS environment variable in the sbatch command.
 
 * Changes in SLURM 2.2.0.pre10
 ==============================
diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1
index a90b979001c..8f098d8eb51 100644
--- a/doc/man/man1/sbatch.1
+++ b/doc/man/man1/sbatch.1
@@ -1,4 +1,4 @@
-.TH "sbatch" "1" "SLURM 2.2" "April 2010" "SLURM Commands"
+.TH "sbatch" "1" "SLURM 2.2" "September 2010" "SLURM Commands"
 
 .SH "NAME"
 sbatch \- Submit a batch script to SLURM.
@@ -126,15 +126,6 @@ The default value is the current working directory.
 Checkpoint files will be of the form "<job_id>.ckpt" for jobs
 and "<job_id>.<step_id>.ckpt" for job steps.
 
-.TP
-\fB\-\-clusters\fR=<\fIstring\fR>
-Clusters to issue commands to.  Multiple cluster names may be comma separated.
-The job will be submitted to the one cluster providing the earliest expected
-job initiation time. The default value is the current cluster.  A value of
-of '\fIall\fR' will query to run on all clusters.  Note the
-\fB\-\-export\fR option to control environment variables exported
-between clusters.
-
 .TP
 \fB\-\-comment\fR=<\fIstring\fR>
 An arbitrary comment.
@@ -502,6 +493,15 @@ License names can be followed by an asterisk and count
 Multiple license names should be comma separated (e.g.
 "\-\-licenses=foo*4,bar").
 
+.TP
+\fB\-M\fR, \fB\-\-clusters\fR=<\fIstring\fR>
+Clusters to issue commands to.  Multiple cluster names may be comma separated.
+The job will be submitted to the one cluster providing the earliest expected
+job initiation time. The default value is the current cluster.  A value of
+of '\fIall\fR' will query to run on all clusters.  Note the
+\fB\-\-export\fR option to control environment variables exported
+between clusters.
+
 .TP
 \fB\-m\fR, \fB\-\-distribution\fR=
 <\fIblock\fR|\fIcyclic\fR|\fIarbitrary\fR|\fIplane=<options>\fR>
@@ -1070,6 +1070,9 @@ Same as \fB\-\-checkpoint\fR
 \fBSLURM_CHECKPOINT_DIR\fR
 Same as \fB\-\-checkpoint\-dir\fR
 .TP
+\fBSBATCH_CLUSTERS\fR or \fBSLURM_CLUSTERS\fR
+Same as \fB\-\-clusters\fR
+.TP
 \fBSBATCH_CONN_TYPE\fR
 Same as \fB\-\-conn\-type\fR
 .TP
diff --git a/src/sbatch/mult_cluster.c b/src/sbatch/mult_cluster.c
index 2b203317f0c..7e52f366398 100644
--- a/src/sbatch/mult_cluster.c
+++ b/src/sbatch/mult_cluster.c
@@ -108,7 +108,7 @@ local_cluster_rec_t *_job_will_run (job_desc_msg_t *req)
 			slurm_seterrno(rc);
 		break;
 	case RESPONSE_JOB_WILL_RUN:
-		if(working_cluster_rec->flags & CLUSTER_FLAG_BG)
+		if (working_cluster_rec->flags & CLUSTER_FLAG_BG)
 			type = "cnodes";
 		will_run_resp = (will_run_response_msg_t *) resp_msg.data;
 		slurm_make_time_str(&will_run_resp->start_time,
@@ -123,7 +123,7 @@ local_cluster_rec_t *_job_will_run (job_desc_msg_t *req)
 		if (will_run_resp->preemptee_job_id) {
 			local_cluster->preempt_cnt =
 				list_count(will_run_resp->preemptee_job_id);
-			if(opt.verbose >= LOG_LEVEL_DEBUG) {
+			if (opt.verbose >= LOG_LEVEL_DEBUG) {
 				ListIterator itr;
 				uint32_t *job_id_ptr;
 				char *job_list = NULL, *sep = "";
@@ -160,9 +160,9 @@ extern int sbatch_set_first_avail_cluster(job_desc_msg_t *req)
 	List ret_list = NULL;
 
 	/* return if we only have 1 or less clusters here */
-	if(!opt.clusters || !list_count(opt.clusters)) {
+	if (!opt.clusters || !list_count(opt.clusters)) {
 		return rc;
-	} else if(list_count(opt.clusters) == 1) {
+	} else if (list_count(opt.clusters) == 1) {
 		working_cluster_rec = list_peek(opt.clusters);
 		return rc;
 	}
@@ -174,12 +174,14 @@ extern int sbatch_set_first_avail_cluster(job_desc_msg_t *req)
 	}
 
 	ret_list = list_create(_destroy_local_cluster_rec);
+	if (ret_list == NULL)
+		fatal("list_create malloc failure");
 	itr = list_iterator_create(opt.clusters);
-	while((working_cluster_rec = list_next(itr))) {
-		if((local_cluster = _job_will_run(req))) {
-			if(!ret_list)
+	while ((working_cluster_rec = list_next(itr))) {
+		if ((local_cluster = _job_will_run(req))) {
+			if (!ret_list)
 				ret_list = list_create(
-					_destroy_local_cluster_rec);
+					   _destroy_local_cluster_rec);
 			list_append(ret_list, local_cluster);
 		} else
 			error("Problem with submit to cluster %s: %m",
@@ -190,7 +192,7 @@ extern int sbatch_set_first_avail_cluster(job_desc_msg_t *req)
 	if (host_set)
 		req->alloc_node = NULL;
 
-	if(!ret_list) {
+	if (!ret_list) {
 		error("Can't run on any of the clusters given");
 		return SLURM_ERROR;
 	}
diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c
index 9622febec05..6b725d8b97e 100644
--- a/src/sbatch/opt.c
+++ b/src/sbatch/opt.c
@@ -438,6 +438,8 @@ env_vars_t env_vars[] = {
   {"SBATCH_BLRTS_IMAGE",   OPT_STRING,     &opt.blrtsimage,    NULL          },
   {"SBATCH_CHECKPOINT",    OPT_STRING,     &opt.ckpt_interval_str, NULL      },
   {"SBATCH_CHECKPOINT_DIR",OPT_STRING,     &opt.ckpt_dir,      NULL          },
+  {"SBATCH_CLUSTERS",      OPT_STRING,     &opt.clusters,      NULL          },
+  {"SLURM_CLUSTERS",       OPT_STRING,     &opt.clusters,      NULL          },
   {"SBATCH_CNLOAD_IMAGE",  OPT_STRING,     &opt.linuximage,    NULL          },
   {"SBATCH_CONN_TYPE",     OPT_CONN_TYPE,  NULL,               NULL          },
   {"SBATCH_CPU_BIND",      OPT_CPU_BIND,   NULL,               NULL          },
@@ -1210,10 +1212,10 @@ static void _set_options(int argc, char **argv)
 			}
 			break;
 		case 'M':
-			if(opt.clusters)
+			if (opt.clusters)
 				list_destroy(opt.clusters);
-			if(!(opt.clusters =
-			     slurmdb_get_info_cluster(optarg))) {
+			if (!(opt.clusters =
+			      slurmdb_get_info_cluster(optarg))) {
 				error("'%s' invalid entry for --clusters",
 				      optarg);
 				exit(1);
diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c
index 94344b6865d..7f0d08dfb79 100644
--- a/src/sbatch/sbatch.c
+++ b/src/sbatch/sbatch.c
@@ -148,8 +148,8 @@ int main(int argc, char *argv[])
 	desc.script = (char *)script_body;
 
 	/* If can run on multiple clusters find the earliest run time
-	   and run it there */
-	if(sbatch_set_first_avail_cluster(&desc) != SLURM_SUCCESS)
+	 * and run it there */
+	if (sbatch_set_first_avail_cluster(&desc) != SLURM_SUCCESS)
 		exit(error_exit);
 
 	while (slurm_submit_batch_job(&desc, &resp) < 0) {
@@ -180,7 +180,7 @@ int main(int argc, char *argv[])
         }
 
 	printf("Submitted batch job %u", resp->job_id);
-	if(working_cluster_rec)
+	if (working_cluster_rec)
 		printf(" on cluster %s", working_cluster_rec->name);
 	printf("\n");
 
diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c
index c6f316e7900..4167d60ac2f 100644
--- a/src/slurmctld/controller.c
+++ b/src/slurmctld/controller.c
@@ -360,10 +360,11 @@ int main(int argc, char *argv[])
 	assoc_init_arg.remove_assoc_notify = _remove_assoc;
 	assoc_init_arg.remove_qos_notify = _remove_qos;
 	assoc_init_arg.cache_level = ASSOC_MGR_CACHE_ASSOC |
-		ASSOC_MGR_CACHE_USER | ASSOC_MGR_CACHE_QOS;
+				     ASSOC_MGR_CACHE_USER  |
+				     ASSOC_MGR_CACHE_QOS;
 
 	if (assoc_mgr_init(acct_db_conn, &assoc_init_arg)) {
-		if(accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS)
+		if (accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS)
 			error("Association database appears down, "
 			      "reading from state file.");
 		else
-- 
GitLab