From 16eb8b148360d5675f965d8b1d1a6a84d3e92843 Mon Sep 17 00:00:00 2001
From: Nate Rini <nate@schedmd.com>
Date: Tue, 27 Aug 2019 13:40:32 -0600
Subject: [PATCH] Honor ntasks_per_node in _compute_c_b_task_dist()

Add _at_tpn_limit() as helper to determine when a given node is over the
tasks_per_node limit and to log when then happens.

Bug 7629.
---
 NEWS                                      |  1 +
 src/plugins/select/cons_tres/dist_tasks.c | 31 +++++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/NEWS b/NEWS
index b0e80b95b5b..47c18c52b6b 100644
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,7 @@ documents those changes that are of interest to users and administrators.
     in SchedulerParameters by decoupling defer from too fragmented logic.
  -- Fix printf format string error on FreeBSD.
  -- Fix parsing of delay_boot in controller when additional arguments follow it.
+ -- Fix --ntasks-per-node in cons_tres.
 
 * Changes in Slurm 19.05.4
 ==========================
diff --git a/src/plugins/select/cons_tres/dist_tasks.c b/src/plugins/select/cons_tres/dist_tasks.c
index f492bdf7995..f3ff22a1a6a 100644
--- a/src/plugins/select/cons_tres/dist_tasks.c
+++ b/src/plugins/select/cons_tres/dist_tasks.c
@@ -576,6 +576,29 @@ static bool _tres_tasks_avail(uint32_t *gres_task_limit,
 	return false;
 }
 
+/* At tasks_per_node limit for given node */
+static bool _at_tpn_limit(const uint32_t n, const struct job_record *job_ptr,
+			  const char *tag, bool log_error)
+{
+	const job_resources_t *job_res = job_ptr->job_resrcs;
+	const log_level_t log_lvl = log_error ? LOG_LEVEL_ERROR :
+						LOG_LEVEL_INFO;
+
+	if (job_ptr->details->ntasks_per_node == 0)
+		return false;
+
+	if (job_res->tasks_per_node[n] < job_ptr->details->ntasks_per_node)
+		return false;
+
+	if (log_error || (select_debug_flags & DEBUG_FLAG_SELECT_TYPE))
+		log_var(log_lvl,
+			"%s: %s over tasks_per_node for %pJ node:%u task_per_node:%d max:%" PRIu16,
+			__func__, tag, job_ptr, n, job_res->tasks_per_node[n],
+			job_ptr->details->ntasks_per_node);
+
+	return true;
+}
+
 /*
  * _compute_task_c_b_task_dist - compute the number of tasks on each
  * of the node for the cyclic and block distribution. We need to do
@@ -693,6 +716,8 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr,
 				break;
 			if (!_tres_tasks_avail(gres_task_limit, job_res, n))
 				break;
+			if (_at_tpn_limit(n, job_ptr, "fill allocated", false))
+				break;
 			tid++;
 			job_res->tasks_per_node[n]++;
 			for (l = 0; l < job_ptr->details->cpus_per_task; l++) {
@@ -737,6 +762,9 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr,
 				if (!_tres_tasks_avail(gres_task_limit,
 						       job_res, n))
 					break;
+				if (_at_tpn_limit(n, job_ptr, "fill additional",
+						  false))
+					break;
 				tid++;
 				job_res->tasks_per_node[n]++;
 				for (l = 0; l < job_ptr->details->cpus_per_task;
@@ -767,6 +795,9 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr,
 			if (test_tres_tasks &&
 			    !_tres_tasks_avail(gres_task_limit, job_res, n))
 				continue;
+			if (_at_tpn_limit(n, job_ptr, "fill non-dedicated CPUs",
+					  true))
+				continue;
 			more_tres_tasks = true;
 			tid++;
 			job_res->tasks_per_node[n]++;
-- 
GitLab