From aa0b388e2a57314ea5ac032863481cedbf3be7eb Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Thu, 10 Apr 2014 10:18:41 -0700
Subject: [PATCH] Parse gres:type spec in slurm.conf

---
 doc/html/gres.shtml       | 21 ++++++++++++++-------
 doc/man/man5/slurm.conf.5 | 11 ++++++-----
 src/common/gres.c         | 31 +++++++++++++++++++++++--------
 3 files changed, 43 insertions(+), 20 deletions(-)

diff --git a/doc/html/gres.shtml b/doc/html/gres.shtml
index 8436b9fdd0b..ef3a6f8ead1 100644
--- a/doc/html/gres.shtml
+++ b/doc/html/gres.shtml
@@ -69,15 +69,20 @@ the option must be specified on all nodes and SLURM will track the assignment
 of each specific resource on each node. Otherwise SLURM will only track a
 count of allocated resources rather than the state of each individual device
 file.</LI>
+
+<LI><B>Type</B> Optionally specify the device type. For example, this might
+be used to identify a specific model of GPU, which users can then specify
+in their job request.
+NOTE: This is a new capability added in Slurm version 14.11.</LI>
 </UL>
 
 <P>Sample gres.conf file:</P>
 <PRE>
 # Configure support for our four GPUs
-Name=gpu File=/dev/nvidia0 CPUs=0,1
-Name=gpu File=/dev/nvidia1 CPUs=0,1
-Name=gpu File=/dev/nvidia2 CPUs=2,3
-Name=gpu File=/dev/nvidia3 CPUs=2,3
+Name=gpu Type=tesla  File=/dev/nvidia0 CPUs=0,1
+Name=gpu Type=tesla  File=/dev/nvidia1 CPUs=0,1
+Name=gpu Type=kepler File=/dev/nvidia2 CPUs=2,3
+Name=gpu Type=kepler File=/dev/nvidia3 CPUs=2,3
 Name=bandwidth Count=20M
 </PRE>
 <!-------------------------------------------------------------------------->
@@ -88,11 +93,13 @@ requested at job submit time using the <I>--gres</I> option supported by
 the <I>salloc</I>, <I>sbatch</I> and <I>srun</I> commands. The option
 requires an argument specifying which generic resources are required and
 how many resources. The resource specification is of the form
-<I>name[:count]</I>. The <I>name</I> is the same name as
+<I>name[:type:count]</I>. The <I>name</I> is the same name as
 specified by the <I>GresTypes</I> and <I>Gres</I> configuration parameters.
+<I>type</I> identifies a specific type of that generic resource (e.g. a
+specific model of GPU).
 <I>count</I> specifies how many resources are required and has a default
 value of 1. For example:<BR> 
-<I>sbatch --gres=gpu:2 ...</I>.</P>
+<I>sbatch --gres=gpu:kepler:2 ...</I>.</P>
 
 <P>Jobs will be allocated specific generic resources as needed to satisfy
 the request. If the job is suspended, those resources do not become available
@@ -171,6 +178,6 @@ to a physical device</pre>
 explicitly defined in the offload pragmas.</P>
 <!-------------------------------------------------------------------------->
 
-<p style="text-align: center;">Last modified 25 October 2012</p>
+<p style="text-align: center;">Last modified 10 April 2014</p>
 
 </body></html>
diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5
index a80f8b7e9ec..2231816f19d 100644
--- a/doc/man/man5/slurm.conf.5
+++ b/doc/man/man5/slurm.conf.5
@@ -1,4 +1,4 @@
-.TH "slurm.conf" "5" "October 2013" "slurm.conf 14.03" "Slurm configuration file"
+.TH "slurm.conf" "5" "April 2014" "slurm.conf 14.11" "Slurm configuration file"
 
 .SH "NAME"
 slurm.conf \- Slurm configuration file
@@ -2973,13 +2973,14 @@ Also see \fBGres\fR.
 .TP
 \fBGres\fR
 A comma delimited list of generic resources specifications for a node.
-Each resource specification consists of a name followed by an optional
-colon with a numeric value (default value is one)
-(e.g. "Gres=bandwidth:10000,gpu:2").
+Each resource specification consists of a name followed by a colon with a
+numeric value (e.g. "Gres=bandwidth:10000,gpu:2").
 A suffix of "K", "M" or "G" may be used to multiply the number by 1024,
-1048576 or 1073741824 respectively (e.g. "Gres=bandwidth:4G,gpu:4")..
+1048576 or 1073741824 respectively (e.g. "Gres=bandwidth:4G,gpu:4").
 By default a node has no generic resources and its maximum count is
 4,294,967,295.
+The generic resource type can optionally be specified using a third colon
+separated field after the name (e.g. "Gres=gpu:tesla:1,gpu:kepler:1").
 Also see \fBFeature\fR.
 
 .TP
diff --git a/src/common/gres.c b/src/common/gres.c
index e6cf1146f22..56a73729b13 100644
--- a/src/common/gres.c
+++ b/src/common/gres.c
@@ -1215,11 +1215,21 @@ static void _gres_node_list_delete(void *list_element)
 	xfree(gres_ptr);
 }
 
+/*
+ * Compute the total GRES count for a particular gres_name.
+ * Note that a given gres_name can appear multiple times in the orig_config
+ * string for multiple types (e.g. "gres=gpu:kepler:1,gpu:tesla:2").
+ * IN orig_config - gres configuration from slurm.conf
+ * IN gres_name - name of the gres type (e.g. "gpu")
+ * IN gres_name_colon - gres name with appended colon
+ * IN gres_name_colon_len - size of gres_name_colon
+ * RET - Total configured count for this GRES type
+ */
 static uint32_t _get_gres_cnt(char *orig_config, char *gres_name,
 			      char *gres_name_colon, int gres_name_colon_len)
 {
-	char *node_gres_config, *tok, *last_num = NULL, *last_tok = NULL;
-	uint32_t gres_config_cnt = 0;
+	char *node_gres_config, *tok, *num, *last_num = NULL, *last_tok = NULL;
+	uint32_t gres_config_cnt = 0, tmp_gres_cnt = 0;
 
 	if (orig_config == NULL)
 		return gres_config_cnt;
@@ -1232,17 +1242,22 @@ static uint32_t _get_gres_cnt(char *orig_config, char *gres_name,
 			break;
 		}
 		if (!strncmp(tok, gres_name_colon, gres_name_colon_len)) {
-			tok += gres_name_colon_len;
-			gres_config_cnt = strtol(tok, &last_num, 10);
+			num = strrchr(tok, ':');
+			if (!num) {
+				error("Bad GRES configuration: %s", tok);
+				break;
+			}
+			num++;
+			tmp_gres_cnt = strtol(num, &last_num, 10);
 			if (last_num[0] == '\0')
 				;
 			else if ((last_num[0] == 'k') || (last_num[0] == 'K'))
-				gres_config_cnt *= 1024;
+				tmp_gres_cnt *= 1024;
 			else if ((last_num[0] == 'm') || (last_num[0] == 'M'))
-				gres_config_cnt *= (1024 * 1024);
+				tmp_gres_cnt *= (1024 * 1024);
 			else if ((last_num[0] == 'g') || (last_num[0] == 'G'))
-				gres_config_cnt *= (1024 * 1024 * 1024);
-			break;
+				tmp_gres_cnt *= (1024 * 1024 * 1024);
+			gres_config_cnt += tmp_gres_cnt;
 		}
 		tok = strtok_r(NULL, ",", &last_tok);
 	}
-- 
GitLab