From 2cde0e02b1363ec559b68c292214eb2aa5a3185a Mon Sep 17 00:00:00 2001
From: Morris Jette <jette@schedmd.com>
Date: Wed, 14 May 2014 16:32:11 -0700
Subject: [PATCH] task/cgroup - fix for specialized cores

task/cgroup - Correct specialized core task binding with user supplied
invalid CPU mask or map. Rather than generating an error and ignoring
user specification. Mask user supplied map against available CPUs or
bind to all available CPUs. In either case, log the invalid CPU map
or mask.
bug 782
---
 NEWS                                         |  2 +
 src/plugins/task/cgroup/task_cgroup_cpuset.c | 46 ++++++++++++++++++--
 2 files changed, 45 insertions(+), 3 deletions(-)

diff --git a/NEWS b/NEWS
index 4bb0c16a124..01fa871c3f4 100644
--- a/NEWS
+++ b/NEWS
@@ -49,6 +49,8 @@ documents those changes that are of interest to users and admins.
  -- Do not resume a job with specialized cores on a node running another job
     with specialized cores (only one can run at a time).
  -- Add specialized core count to job suspend/resume calls.
+ -- task/cgroup - Correct specialized core task binding with user supplied
+    invalid CPU mask or map.
 
 * Changes in Slurm 14.03.4
 ==========================
diff --git a/src/plugins/task/cgroup/task_cgroup_cpuset.c b/src/plugins/task/cgroup/task_cgroup_cpuset.c
index c26d9a35cf4..cf5e3fedaed 100644
--- a/src/plugins/task/cgroup/task_cgroup_cpuset.c
+++ b/src/plugins/task/cgroup/task_cgroup_cpuset.c
@@ -1084,6 +1084,43 @@ extern int task_cgroup_cpuset_attach_task(stepd_step_rec_t *job)
 	return fstatus;
 }
 
+/* The job has specialized cores, synchroize user mask with available cores */
+static bool _validate_mask(uint32_t task_id, hwloc_obj_t obj, cpu_set_t *ts)
+{
+	int i, j, overlaps = 0;
+	bool superset = true;
+
+	for (i = 0; i < CPU_SETSIZE; i++) {
+		if (!CPU_ISSET(i, ts))
+			continue;
+		j = hwloc_bitmap_isset(obj->allowed_cpuset, i);
+		if (j > 0) {
+			overlaps++;
+		} else if (j == 0) {
+			CPU_CLR(i, ts);
+			superset = false;
+		}
+	}
+
+	if (overlaps == 0) {
+		/* The task's cpu map is completely invalid.
+		 * Give it all allowed CPUs */
+		for (i = 0; i < CPU_SETSIZE; i++) {
+			if (hwloc_bitmap_isset(obj->allowed_cpuset, i) > 0)
+				CPU_SET(i, ts);
+		}
+	}
+
+	if (!superset) {
+		info("task/cgroup: Ignoring user CPU binding which overlaps "
+		     "specialized cores for task[%u]", task_id);
+		fprintf(stderr, "Requested cpu_bind option overlaps "
+			"specialized cores for task[%u]\n", task_id);
+	}
+
+	return superset;
+}
+
 /* affinity should be set using sched_setaffinity to not force */
 /* user to have to play with the cgroup hierarchy to modify it */
 extern int task_cgroup_cpuset_set_task_affinity(stepd_step_rec_t *job)
@@ -1107,7 +1144,7 @@ extern int task_cgroup_cpuset_set_task_affinity(stepd_step_rec_t *job)
 	hwloc_obj_type_t hwtype;
 	hwloc_obj_type_t req_hwtype;
 	int bind_verbose = 0;
-	int rc = SLURM_SUCCESS;
+	int rc = SLURM_SUCCESS, match;
 	pid_t    pid = job->envtp->task_pid;
 	size_t tssize;
 	uint32_t nldoms;
@@ -1251,8 +1288,9 @@ extern int task_cgroup_cpuset_set_task_affinity(stepd_step_rec_t *job)
 		 * Bind the taskid in accordance with the specified mode
 		 */
 		obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_MACHINE, 0);
-		if (!hwloc_bitmap_isequal(obj->complete_cpuset,
-				obj->allowed_cpuset)) {
+		match = hwloc_bitmap_isequal(obj->complete_cpuset,
+					     obj->allowed_cpuset);
+		if ((job->job_core_spec == 0) && !match) {
 			info("task/cgroup: entire node must be allocated, "
 			     "disabling affinity, task[%u]", taskid);
 			fprintf(stderr, "Requested cpu_bind option requires "
@@ -1267,6 +1305,8 @@ extern int task_cgroup_cpuset_set_task_affinity(stepd_step_rec_t *job)
 					  job);
 			tssize = sizeof(cpu_set_t);
 			fstatus = SLURM_SUCCESS;
+			if (job->job_core_spec)
+				_validate_mask(taskid, obj, &ts);
 			if ((rc = sched_setaffinity(pid, tssize, &ts))) {
 				error("task/cgroup: task[%u] unable to set "
 				      "mask 0x%s", taskid,
-- 
GitLab