test1.46 3.16 KiB
#!/usr/bin/expect
############################################################################
# Purpose: Test of SLURM functionality
# Test of srun's --kill-on-bad-exit option.
#
# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR
# "FAILURE: ..." otherwise with an explanation of the failure, OR
# anything else indicates a failure mode that must be investigated.
#
# Note: This script generates and then deletes files in the working directory
# named test1.46.input, test1.46.output, and test1.46.error
############################################################################
# Copyright (C) 2002 The Regents of the University of California.
# Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER).
# Written by Morris Jette <jette1@llnl.gov>
# UCRL-CODE-217948.
#
# This file is part of SLURM, a resource management program.
# For details, see <http://www.llnl.gov/linux/slurm/>.
#
# SLURM is free software; you can redistribute it and/or modify it under
# the terms of the GNU General Public License as published by the Free
# Software Foundation; either version 2 of the License, or (at your option)
# any later version.
#
# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
# details.
#
# You should have received a copy of the GNU General Public License along
# with SLURM; if not, write to the Free Software Foundation, Inc.,
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA.
############################################################################
source ./globals
set test_id "1.46"
set exit_code 0
set file_in "test$test_id.prog"
set matches 0
print_header $test_id
#
# Delete left-over input script
# Build input script file
#
# We have the last of the tasks exit. proctrack/linuxproc will not
# signal processes named "slurmstepd" and later user tasks could still
# be named "slurmstepd" when the termination signal comes in.
#
exec $bin_rm -f $file_in
set fd [open "$file_in.c" w]
puts $fd {
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
main()
{
char *id = getenv("SLURM_PROCID");
if (atoi(id) == 9) {
exit(2);
}
sleep(15);
printf("SHOULD_NOT_BE_HERE\n");
}
}
close $fd
# Add delay due to sporatic error "Clock skew detected"
exec $bin_sleep 1
exec $bin_make -f /dev/null $file_in
exec $bin_chmod 700 $file_in
#
# Spawn a shell via srun and send exit command to task 1 only
#
set timeout $max_job_delay
spawn $srun -n10 -N1 -O --kill-on-bad-exit -t1 $file_in
expect {
-re "exit code 2" {
send_user "This error is expected, no worries\n"
incr matches
exp_continue
}
-re "SHOULD_NOT_BE_HERE" {
set matches -10
exp_continue
}
timeout {
send_user "\nFAILURE: srun not responding\n"
kill_srun
set exit_code 1
exp_continue
}
eof {
wait
}
}
if {$matches != 1} {
send_user "\nFAILURE: problem with --kill-on-bad-exit option\n"
set exit_code 1
}
if {$exit_code == 0} {
exec $bin_rm -f $file_in
#exec $bin_rm -f $file_in.c
send_user "\nSUCCESS\n"
}
exit $exit_code