From 60a9ec12e18f260cde569cc3e675cf2cb1688e4d Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Fri, 9 Jan 2015 15:44:17 -0800
Subject: [PATCH] Burst buffer enhancements

Remove GRES spec from burst_buffer.conf
  There is no logic in the generic plugin to support this and the
  cray plugin does not get the information from the config file.
Change allow/deny user separator from colon to comma
Confirm AllowUsers for swap and gres space
Make sure nodes not shared on cray system
Minor changes in several other places
---
 doc/man/man5/burst_buffer.conf.5              | 37 ++++++++++---------
 src/common/slurm_errno.c                      |  2 +-
 .../burst_buffer/common/burst_buffer_common.c | 22 ++++++++---
 .../burst_buffer/cray/burst_buffer_cray.c     | 16 +++++---
 4 files changed, 47 insertions(+), 30 deletions(-)

diff --git a/doc/man/man5/burst_buffer.conf.5 b/doc/man/man5/burst_buffer.conf.5
index 66bc16b5f41..70231215608 100644
--- a/doc/man/man5/burst_buffer.conf.5
+++ b/doc/man/man5/burst_buffer.conf.5
@@ -54,17 +54,18 @@ Slurm distribution for an example.
 Granularity of job space allocations in units of gigabytes.
 The default value is 1 gigabyte.
 
-.TP
-\fBGres\fR
-Generic resources associated with burst buffers.
-This is a completely separate name space from the Gres defined in the slurm.conf
-file.
-The Gres value consistes of a comma separated list of generic resources,
-each of which includes a name separated by a colon and a numeric value.
-The numeric value can include a suffic of "k", "m" or "g", which multiplies
-the numeric value by 1,024, 1,048,576, or 1,073,741,824 respectively.
-The numeric value is a 32-bit value.
-See the example below.
+.\ Possible future enhancement
+.\ .TP
+.\ \fBGres\fR
+.\ Generic resources associated with burst buffers.
+.\ This is a completely separate name space from the Gres defined in the slurm.conf
+.\ file.
+.\ The Gres value consistes of a comma separated list of generic resources,
+.\ each of which includes a name separated by a colon and a numeric value.
+.\ The numeric value can include a suffic of "k", "m" or "g", which multiplies
+.\ the numeric value by 1,024, 1,048,576, or 1,073,741,824 respectively.
+.\ The numeric value is a 32-bit value.
+.\ See the example below.
 
 .TP
 \fBJobSizeLimit\fR
@@ -155,19 +156,19 @@ By default there is no job allocation size limit.
 .br
 ##################################################################
 .br
-AllowUsers=alan:brenda
+AllowUsers=alan,brenda
 .br
 PrivateData=true
-.br
-Gres=nodes:10,other:20
+.\ .br
+.\ Gres=nodes:10,other:20
 .br
 #
 .br
 Granularity=1GB
 .br
-JobSizeLimit=20GB   # Applies to each job
+JobSizeLimit=200GB   # Applies to each job
 .br
-UserSizeLimit=50GB  # Applies to ALL users
+UserSizeLimit=500GB  # Applies to each user
 .br
 #
 .br
@@ -177,9 +178,9 @@ PrioBoostAlloc=200
 .br
 #
 .br
-StageInTimeout=30
+StageInTimeout=30    # Seconds
 .br
-StageOutTimeout=30
+StageOutTimeout=30   # Seconds
 .br
 #
 .br
diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c
index 21b71fd52d6..34c9dda31b3 100644
--- a/src/common/slurm_errno.c
+++ b/src/common/slurm_errno.c
@@ -310,7 +310,7 @@ static slurm_errtab_t slurm_errtab[] = {
 	  "BurstBufferType change requires restart of slurmctld daemon "
 	  "to take effect"},
 	{ ESLURM_BURST_BUFFER_PERMISSION,
-	  "Burst Buffer permssion denied"			},
+	  "Burst Buffer permission denied"			},
 	{ ESLURM_BURST_BUFFER_LIMIT,
 	  "Burst Buffer resource limit exceeded"		},
 	{ ESLURM_INVALID_BURST_BUFFER_REQUEST,
diff --git a/src/plugins/burst_buffer/common/burst_buffer_common.c b/src/plugins/burst_buffer/common/burst_buffer_common.c
index ffd3310d56e..63b5ef29ae1 100644
--- a/src/plugins/burst_buffer/common/burst_buffer_common.c
+++ b/src/plugins/burst_buffer/common/burst_buffer_common.c
@@ -67,7 +67,10 @@
 
 #include "burst_buffer_common.h"
 
-/* Translate colon delimitted list of users into a UID array,
+/* For possible future use by burst_buffer/generic */
+#define _SUPPORT_GRES 0
+
+/* Translate comma delimitted list of users into a UID array,
  * Return value must be xfreed */
 static uid_t *_parse_users(char *buf)
 {
@@ -83,7 +86,7 @@ static uid_t *_parse_users(char *buf)
 		delim[0] = '\0';
 	array_size = 1;
 	user_array = xmalloc(sizeof(uid_t) * array_size);
-	tok = strtok_r(tmp, ":", &save_ptr);
+	tok = strtok_r(tmp, ",", &save_ptr);
 	while (tok) {
 		if ((uid_from_string(tok, user_array + inx) == -1) ||
 		    (user_array[inx] == 0)) {
@@ -95,7 +98,7 @@ static uid_t *_parse_users(char *buf)
 						      sizeof(uid_t)*array_size);
 			}
 		}
-		tok = strtok_r(NULL, ":", &save_ptr);
+		tok = strtok_r(NULL, ",", &save_ptr);
 	}
 	xfree(tmp);
 	return user_array;
@@ -116,7 +119,7 @@ static char *_print_users(uid_t *buf)
 		if (!user_elem)
 			continue;
 		if (user_str)
-			xstrcat(user_str, ":");
+			xstrcat(user_str, ",");
 		xstrcat(user_str, user_elem);
 		xfree(user_elem);
 	}
@@ -322,6 +325,7 @@ extern void bb_remove_user_load(bb_alloc_t *bb_ptr, bb_state_t *state_ptr)
 	}
 }
 
+#if _SUPPORT_GRES
 static uint32_t _atoi(char *tok)
 {
 	char *end_ptr = NULL;
@@ -341,20 +345,24 @@ static uint32_t _atoi(char *tok)
 	}
 	return size_u;
 }
+#endif
 
 /* Load and process configuration parameters */
 extern void bb_load_config(bb_state_t *state_ptr, char *type)
 {
 	s_p_hashtbl_t *bb_hashtbl = NULL;
-	char *bb_conf, *colon, *save_ptr, *tmp = NULL, *tok, *value;
+	char *bb_conf, *tmp = NULL, *value;
+#if _SUPPORT_GRES
+	char *colon, *save_ptr = NULL, *tok;
 	uint32_t gres_cnt;
+#endif
 	int fd, i;
 	static s_p_options_t bb_options[] = {
 		{"AllowUsers", S_P_STRING},
 		{"DenyUsers", S_P_STRING},
 		{"GetSysState", S_P_STRING},
 		{"Granularity", S_P_STRING},
-		{"Gres", S_P_STRING},
+/*		{"Gres", S_P_STRING},	*/
 		{"JobSizeLimit", S_P_STRING},
 		{"PrioBoostAlloc", S_P_UINT32},
 		{"PrioBoostUse", S_P_UINT32},
@@ -417,6 +425,7 @@ extern void bb_load_config(bb_state_t *state_ptr, char *type)
 			state_ptr->bb_config.granularity = 1;
 		}
 	}
+#if _SUPPORT_GRES
 	if (s_p_get_string(&tmp, "Gres", bb_hashtbl)) {
 		tok = strtok_r(tmp, ",", &save_ptr);
 		while (tok) {
@@ -441,6 +450,7 @@ extern void bb_load_config(bb_state_t *state_ptr, char *type)
 		}
 		xfree(tmp);
 	}
+#endif
 	if (s_p_get_string(&tmp, "JobSizeLimit", bb_hashtbl)) {
 		state_ptr->bb_config.job_size_limit = bb_get_size_num(tmp, 1);
 		xfree(tmp);
diff --git a/src/plugins/burst_buffer/cray/burst_buffer_cray.c b/src/plugins/burst_buffer/cray/burst_buffer_cray.c
index 6569a0d94ff..1d65169087f 100644
--- a/src/plugins/burst_buffer/cray/burst_buffer_cray.c
+++ b/src/plugins/burst_buffer/cray/burst_buffer_cray.c
@@ -1586,7 +1586,7 @@ extern int bb_p_load_state(bool init_config)
 {
 	pthread_mutex_lock(&bb_state.bb_mutex);
 	if (bb_state.bb_config.debug_flag)
-		info("%s: %s", plugin_type,  __func__);
+		debug("%s: %s", plugin_type,  __func__);
 	_load_state();
 	pthread_mutex_unlock(&bb_state.bb_mutex);
 
@@ -1652,6 +1652,7 @@ extern int bb_p_state_pack(uid_t uid, Buf buffer, uint16_t protocol_version)
 extern int bb_p_job_validate(struct job_descriptor *job_desc,
 			     uid_t submit_uid)
 {
+	bool have_gres = false, have_swap = false;
 	int32_t bb_size = 0;
 	char *key;
 	int i, rc;
@@ -1672,8 +1673,12 @@ extern int bb_p_job_validate(struct job_descriptor *job_desc,
 			bb_size = bb_get_size_num(key + 11,
 						bb_state.bb_config.granularity);
 		}
+		if (strstr(job_desc->burst_buffer, "SLURM_GRES="))
+			have_gres = true;
+		if (strstr(job_desc->burst_buffer, "SLURM_SWAP="))
+			have_swap = true;
 	}
-	if (bb_size == 0)
+	if ((bb_size == 0) && (have_gres == false) && (have_swap == false))
 		return SLURM_SUCCESS;
 	if (bb_size < 0)
 		return ESLURM_BURST_BUFFER_LIMIT;
@@ -1716,9 +1721,10 @@ extern int bb_p_job_validate(struct job_descriptor *job_desc,
 		     "but total space is only %u",
 		     job_desc->user_id, bb_size, bb_state.total_space);
 	}
-
 	pthread_mutex_unlock(&bb_state.bb_mutex);
 
+	job_desc->shared = 0;	/* Compute nodes can not be shared */
+
 	return SLURM_SUCCESS;
 }
 
@@ -1951,10 +1957,10 @@ extern int bb_p_job_test_stage_in(struct job_record *job_ptr, bool test_only)
 		     jobid2fmt(job_ptr, jobid_buf, sizeof(jobid_buf)),
 		     (int) test_only);
 	}
-	if (job_ptr->array_recs && (job_ptr->array_task_id == NO_VAL))
-		return -1;
 	if ((bb_spec = _get_bb_spec(job_ptr)) == NULL)
 		return rc;
+	if (job_ptr->array_recs && (job_ptr->array_task_id == NO_VAL))
+		return -1;
 	pthread_mutex_lock(&bb_state.bb_mutex);
 	bb_ptr = bb_find_job_rec(job_ptr, bb_state.bb_hash);
 	if (!bb_ptr) {
-- 
GitLab