Honor ntasks_per_node in _compute_c_b_task_dist()

Add _at_tpn_limit() as helper to determine when a given node is over the tasks_per_node limit and to log when then happens. Bug 7629.

Honor ntasks_per_node in _compute_c_b_task_dist()
Add _at_tpn_limit() as helper to determine when a given node is over the tasks_per_node limit and to log when then happens. Bug 7629.
16eb8b14 · Nate Rini · Brian Christiansen · 1c4e91d0 · 16eb8b14 · 16eb8b14
Commit 16eb8b14 authored 5 years ago by Nate Rini Committed by Brian Christiansen 5 years ago
--- a/NEWS
+++ b/NEWS
@@ -10,6 +10,7 @@ documents those changes that are of interest to users and administrators.
    in SchedulerParameters by decoupling defer from too fragmented logic.
 -- Fix printf format string error on FreeBSD.
 -- Fix parsing of delay_boot in controller when additional arguments follow it.
+ -- Fix --ntasks-per-node in cons_tres.
 * Changes in Slurm 19.05.4
 ==========================

--- a/src/plugins/select/cons_tres/dist_tasks.c
+++ b/src/plugins/select/cons_tres/dist_tasks.c
@@ -576,6 +576,29 @@ static bool _tres_tasks_avail(uint32_t *gres_task_limit,
 	return false;
 }
+/* At tasks_per_node limit for given node */
+static bool _at_tpn_limit(const uint32_t n, const struct job_record *job_ptr,
+			  const char *tag, bool log_error)
+{
+	const job_resources_t *job_res = job_ptr->job_resrcs;
+	const log_level_t log_lvl = log_error ? LOG_LEVEL_ERROR :
+						LOG_LEVEL_INFO;
+	if (job_ptr->details->ntasks_per_node == 0)
+		return false;
+	if (job_res->tasks_per_node[n] < job_ptr->details->ntasks_per_node)
+		return false;
+	if (log_error || (select_debug_flags & DEBUG_FLAG_SELECT_TYPE))
+		log_var(log_lvl,
+			"%s: %s over tasks_per_node for %pJ node:%u task_per_node:%d max:%" PRIu16,
+			__func__, tag, job_ptr, n, job_res->tasks_per_node[n],
+			job_ptr->details->ntasks_per_node);
+	return true;
+}
 /*
 * _compute_task_c_b_task_dist - compute the number of tasks on each
 * of the node for the cyclic and block distribution. We need to do
@@ -693,6 +716,8 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr,
 				break;
 			if (!_tres_tasks_avail(gres_task_limit, job_res, n))
 				break;
+			if (_at_tpn_limit(n, job_ptr, "fill allocated", false))
+				break;
 			tid++;
 			job_res->tasks_per_node[n]++;
 			for (l = 0; l < job_ptr->details->cpus_per_task; l++) {
@@ -737,6 +762,9 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr,
 				if (!_tres_tasks_avail(gres_task_limit,
 						       job_res, n))
 					break;
+				if (_at_tpn_limit(n, job_ptr, "fill additional",
+						  false))
+					break;
 				tid++;
 				job_res->tasks_per_node[n]++;
 				for (l = 0; l < job_ptr->details->cpus_per_task;
@@ -767,6 +795,9 @@ static int _compute_c_b_task_dist(struct job_record *job_ptr,
 			if (test_tres_tasks &&
 			    !_tres_tasks_avail(gres_task_limit, job_res, n))
 				continue;
+			if (_at_tpn_limit(n, job_ptr, "fill non-dedicated CPUs",
+					  true))
+				continue;
 			more_tres_tasks = true;
 			tid++;
 			job_res->tasks_per_node[n]++;