From 03a92da4ac02313d0cade8ad3935a57379b714a8 Mon Sep 17 00:00:00 2001
From: jette <jette@schedmd.com>
Date: Thu, 11 Jul 2013 13:13:56 -0700
Subject: [PATCH] Minor restructing of process signaling logic with deferal for
 core

The most significant change is that we don't signal individual processes
unless there are some processes in the step which are currently core
dumping. This change signals the processes closer together in time
when possible.
---
 src/slurmd/common/proctrack.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/src/slurmd/common/proctrack.c b/src/slurmd/common/proctrack.c
index 30f7f573933..5d170548aa9 100644
--- a/src/slurmd/common/proctrack.c
+++ b/src/slurmd/common/proctrack.c
@@ -264,7 +264,6 @@ static void *_sig_agent(void *args)
 		int i, npids = 0, hung_pids = 0;
 		char *stat_fname = NULL;
 
-		sleep(5);
 		if (slurm_container_get_pids(agent_arg_ptr->cont_id, &pids,
 					     &npids) == SLURM_SUCCESS) {
 			hung_pids = 0;
@@ -277,14 +276,15 @@ static void *_sig_agent(void *args)
 					      (int) pids[i]);
 					hung_pids++;
 				} else {
+					/* Kill processes that we can now */
 					kill(pids[i], agent_arg_ptr->signal);
-					pids[i] = 0;
 				}
 				xfree(stat_fname);
 			}
 		}
 		if (hung_pids == 0)
 			break;
+		sleep(5);
 	}
 
 	(void) (*(ops.signal)) (agent_arg_ptr->cont_id, agent_arg_ptr->signal);
@@ -347,7 +347,7 @@ extern int slurm_container_signal(uint64_t cont_id, int signal)
 						      (int) pids[i]);
 						hung_pids++;
 					} else {
-						kill(pids[i], signal);
+						/* Don't test this PID again */
 						pids[i] = 0;
 					}
 					xfree(stat_fname);
@@ -357,8 +357,8 @@ extern int slurm_container_signal(uint64_t cont_id, int signal)
 			}
 			xfree(pids);
 			if (hung_pids) {
-				info("Defering sending signal to processes "
-				     "currently core dumping");
+				info("Defering sending signal, processes in "
+				     "job are currently core dumping");
 				_spawn_signal_thread(cont_id, signal);
 				return SLURM_SUCCESS;
 			}
-- 
GitLab