From 281c5b2313f349c784d426da20d07cb0bb5c831d Mon Sep 17 00:00:00 2001 From: Morris Jette <jette@schedmd.com> Date: Fri, 3 Jun 2011 10:41:42 -0700 Subject: [PATCH] Add --enable-salloc-kill-cmd build option Add an configure option of --enable-salloc-kill-cmd which will cause the salloc command to signal its child processes when the job ends. Job signalling will be the default for Cray systems. Jobs will not be signalled by default on other systems. SIGHUP will be used for interactive jobs and SIGTERM will be used for other jobs. --- auxdir/x_ac_cray.m4 | 1 + auxdir/x_ac_debug.m4 | 18 +++++++++++ config.h.in | 3 ++ configure | 72 +++++++++++++++++++++++++++++++------------ doc/man/man1/salloc.1 | 4 ++- src/salloc/salloc.c | 9 ++++-- 6 files changed, 83 insertions(+), 24 deletions(-) diff --git a/auxdir/x_ac_cray.m4 b/auxdir/x_ac_cray.m4 index 816b5a2d0db..c01d3aaba37 100644 --- a/auxdir/x_ac_cray.m4 +++ b/auxdir/x_ac_cray.m4 @@ -79,6 +79,7 @@ AC_DEFUN([X_AC_CRAY], AC_DEFINE(SYSTEM_DIMENSIONS, 3, [3-dimensional architecture]) AC_DEFINE(HAVE_FRONT_END, 1, [Define to 1 if running slurmd on front-end only]) AC_DEFINE(HAVE_CRAY, 1, [Define to 1 for Cray XT/XE systems]) + AC_DEFINE(SALLOC_KILL_CMD, 1, [Define to 1 for salloc to kill child processes at job termination]) AC_DEFINE(SALLOC_RUN_FOREGROUND, 1, [Define to 1 to require salloc execution in the foreground.]) fi AM_CONDITIONAL(HAVE_CRAY, test "$ac_have_cray" = "yes") diff --git a/auxdir/x_ac_debug.m4 b/auxdir/x_ac_debug.m4 index 27345408943..37658f01c95 100644 --- a/auxdir/x_ac_debug.m4 +++ b/auxdir/x_ac_debug.m4 @@ -92,6 +92,24 @@ AC_DEFUN([X_AC_DEBUG], [ fi AC_MSG_RESULT([${x_ac_partial_attach=no}]) + AC_MSG_CHECKING([whether salloc should kill child processes at job termination]) + AC_ARG_ENABLE( + [salloc-kill-cmd], + AS_HELP_STRING(--enable-salloc-kill-cmd,salloc should kill child processes at job termination), + [ case "$enableval" in + yes) x_ac_salloc_kill_cmd=yes ;; + no) x_ac_salloc_kill_cmd=no ;; + *) AC_MSG_RESULT([doh!]) + AC_MSG_ERROR([bad value "$enableval" for --enable-salloc-kill-cmd]) ;; + esac + ] + ) + if test "$x_ac_salloc_kill_cmd" = yes; then + AC_DEFINE(SALLOC_KILL_CMD, 1, [Define to 1 for salloc to kill child processes at job termination]) + AC_MSG_RESULT([yes]) + else + AC_MSG_RESULT([no]) + fi AC_MSG_CHECKING([whether to disable salloc execution in the background]) AC_ARG_ENABLE( diff --git a/config.h.in b/config.h.in index b11ca97f784..e7c7e0a37ba 100644 --- a/config.h.in +++ b/config.h.in @@ -404,6 +404,9 @@ /* Define the project's release. */ #undef RELEASE +/* Define to 1 for salloc to kill child processes at job termination */ +#undef SALLOC_KILL_CMD + /* Define to 1 to require salloc execution in the foreground. */ #undef SALLOC_RUN_FOREGROUND diff --git a/configure b/configure index 9e9d622de3a..4d0b61fe37b 100755 --- a/configure +++ b/configure @@ -1039,6 +1039,7 @@ enable_debug enable_memory_leak_debug enable_front_end enable_partial_attach +enable_salloc_kill_cmd enable_salloc_background with_slurmctld_port with_slurmd_port @@ -1718,6 +1719,9 @@ Optional Features: --enable-front-end enable slurmd operation on a front-end --disable-partial-attach disable debugger partial task attach support + --enable-salloc-kill-cmd + salloc should kill child processes at job + termination --disable-salloc-background disable salloc execution in the background --enable-multiple-slurmd @@ -7675,13 +7679,13 @@ if test "${lt_cv_nm_interface+set}" = set; then : else lt_cv_nm_interface="BSD nm" echo "int some_variable = 0;" > conftest.$ac_ext - (eval echo "\"\$as_me:7678: $ac_compile\"" >&5) + (eval echo "\"\$as_me:7682: $ac_compile\"" >&5) (eval "$ac_compile" 2>conftest.err) cat conftest.err >&5 - (eval echo "\"\$as_me:7681: $NM \\\"conftest.$ac_objext\\\"\"" >&5) + (eval echo "\"\$as_me:7685: $NM \\\"conftest.$ac_objext\\\"\"" >&5) (eval "$NM \"conftest.$ac_objext\"" 2>conftest.err > conftest.out) cat conftest.err >&5 - (eval echo "\"\$as_me:7684: output\"" >&5) + (eval echo "\"\$as_me:7688: output\"" >&5) cat conftest.out >&5 if $GREP 'External.*some_variable' conftest.out > /dev/null; then lt_cv_nm_interface="MS dumpbin" @@ -8886,7 +8890,7 @@ ia64-*-hpux*) ;; *-*-irix6*) # Find out which ABI we are using. - echo '#line 8889 "configure"' > conftest.$ac_ext + echo '#line 8893 "configure"' > conftest.$ac_ext if { { eval echo "\"\$as_me\":${as_lineno-$LINENO}: \"$ac_compile\""; } >&5 (eval $ac_compile) 2>&5 ac_status=$? @@ -10674,11 +10678,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:10677: $lt_compile\"" >&5) + (eval echo "\"\$as_me:10681: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:10681: \$? = $ac_status" >&5 + echo "$as_me:10685: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -11013,11 +11017,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:11016: $lt_compile\"" >&5) + (eval echo "\"\$as_me:11020: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:11020: \$? = $ac_status" >&5 + echo "$as_me:11024: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -11118,11 +11122,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:11121: $lt_compile\"" >&5) + (eval echo "\"\$as_me:11125: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:11125: \$? = $ac_status" >&5 + echo "$as_me:11129: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -11173,11 +11177,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:11176: $lt_compile\"" >&5) + (eval echo "\"\$as_me:11180: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:11180: \$? = $ac_status" >&5 + echo "$as_me:11184: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -13557,7 +13561,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 13560 "configure" +#line 13564 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -13653,7 +13657,7 @@ else lt_dlunknown=0; lt_dlno_uscore=1; lt_dlneed_uscore=2 lt_status=$lt_dlunknown cat > conftest.$ac_ext <<_LT_EOF -#line 13656 "configure" +#line 13660 "configure" #include "confdefs.h" #if HAVE_DLFCN_H @@ -15609,11 +15613,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:15612: $lt_compile\"" >&5) + (eval echo "\"\$as_me:15616: $lt_compile\"" >&5) (eval "$lt_compile" 2>conftest.err) ac_status=$? cat conftest.err >&5 - echo "$as_me:15616: \$? = $ac_status" >&5 + echo "$as_me:15620: \$? = $ac_status" >&5 if (exit $ac_status) && test -s "$ac_outfile"; then # The compiler can only warn and ignore the option if not recognized # So say no if there are warnings other than the usual output. @@ -15708,11 +15712,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:15711: $lt_compile\"" >&5) + (eval echo "\"\$as_me:15715: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:15715: \$? = $ac_status" >&5 + echo "$as_me:15719: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -15760,11 +15764,11 @@ else -e 's:.*FLAGS}\{0,1\} :&$lt_compiler_flag :; t' \ -e 's: [^ ]*conftest\.: $lt_compiler_flag&:; t' \ -e 's:$: $lt_compiler_flag:'` - (eval echo "\"\$as_me:15763: $lt_compile\"" >&5) + (eval echo "\"\$as_me:15767: $lt_compile\"" >&5) (eval "$lt_compile" 2>out/conftest.err) ac_status=$? cat out/conftest.err >&5 - echo "$as_me:15767: \$? = $ac_status" >&5 + echo "$as_me:15771: \$? = $ac_status" >&5 if (exit $ac_status) && test -s out/conftest2.$ac_objext then # The compiler can only warn and ignore the option if not recognized @@ -19876,6 +19880,9 @@ $as_echo "#define HAVE_FRONT_END 1" >>confdefs.h $as_echo "#define HAVE_CRAY 1" >>confdefs.h +$as_echo "#define SALLOC_KILL_CMD 1" >>confdefs.h + + $as_echo "#define SALLOC_RUN_FOREGROUND 1" >>confdefs.h fi @@ -20070,6 +20077,31 @@ $as_echo "#define DEBUGGER_PARTIAL_ATTACH 1" >>confdefs.h { $as_echo "$as_me:${as_lineno-$LINENO}: result: ${x_ac_partial_attach=no}" >&5 $as_echo "${x_ac_partial_attach=no}" >&6; } + { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether salloc should kill child processes at job termination" >&5 +$as_echo_n "checking whether salloc should kill child processes at job termination... " >&6; } + # Check whether --enable-salloc-kill-cmd was given. +if test "${enable_salloc_kill_cmd+set}" = set; then : + enableval=$enable_salloc_kill_cmd; case "$enableval" in + yes) x_ac_salloc_kill_cmd=yes ;; + no) x_ac_salloc_kill_cmd=no ;; + *) { $as_echo "$as_me:${as_lineno-$LINENO}: result: doh!" >&5 +$as_echo "doh!" >&6; } + as_fn_error $? "bad value \"$enableval\" for --enable-salloc-kill-cmd" "$LINENO" 5 ;; + esac + + +fi + + if test "$x_ac_salloc_kill_cmd" = yes; then + +$as_echo "#define SALLOC_KILL_CMD 1" >>confdefs.h + + { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +$as_echo "yes" >&6; } + else + { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +$as_echo "no" >&6; } + fi { $as_echo "$as_me:${as_lineno-$LINENO}: checking whether to disable salloc execution in the background" >&5 $as_echo_n "checking whether to disable salloc execution in the background... " >&6; } diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 47c6de19a40..2d9072106fc 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -452,7 +452,9 @@ your command any time that the SLURM controller tells salloc that its job allocation has been revoked. The job allocation can be revoked for a couple of reasons: someone used \fBscancel\fR to revoke the allocation, or the allocation reached its time limit. If you do not specify a signal -name or number, the default signal is SIGTERM. +name or number and SLURM is configured to signal a the spawned command at job +termination, the default signal is SIGHUP for interactive and SIGTERM for +non\-interactive sessions. .TP \fB\-k\fR, \fB\-\-no\-kill\fR diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 8d47242c006..c79fd2ec67d 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -831,11 +831,14 @@ static void _job_complete_handler(srun_job_complete_msg_t *comp) if (tpgid != command_pid && tpgid != getpgrp()) killpg(tpgid, SIGHUP); } -#ifdef HAVE_CRAY - signal = SIGTERM; -#else + if (opt.kill_command_signal_set) signal = opt.kill_command_signal; +#ifdef SALLOC_KILL_CMD + else if (is_interactive) + signal = SIGHUP; + else + signal = SIGTERM; #endif if (signal) { verbose("Sending signal %d to command \"%s\"," -- GitLab