diff --git a/NEWS b/NEWS index 7f04aab311dd73e65cf2ad47e8a92b6c25175357..4e92c69dafa35523bae4bc356224c3380f4784d7 100644 --- a/NEWS +++ b/NEWS @@ -77,6 +77,8 @@ documents those changes that are of interest to users and administrators. -- BGQ - Fix regression in 9cc4ae8add7f where blocks would be deleted on static/overlap systems when some hardware issue happens when restarting the slurmctld. + -- Log if CLOUD node configured without a resume/suspend program or suspend + time. * Changes in Slurm 15.08.6 ========================== diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 5597912dfa30e53d43a177d03d720d6f34be08f8..3402d4025e44c7d75f67d47c902ec7d6dc459082 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -2240,7 +2240,7 @@ in power save mode is assigned work to perform. For reasons of reliability, \fBResumeProgram\fR may execute more than once for a node when the \fBslurmctld\fR daemon crashes and is restarted. If \fBResumeProgram\fR is unable to restore a node to service, it should -requeue any node associated with the node and set the node state to DRAIN. +requeue any job associated with the node and set the node state to DRAIN. The program executes as \fBSlurmUser\fR. The argument to the program will be the names of nodes to be removed from power savings mode (using Slurm's hostlist diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 2a1815061a76a9d3fd6ffb21a6498b7ce03576fc..786d002ddd313280a47dfb5559b8ffd5978645a3 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -1151,8 +1151,17 @@ void set_slurmd_addr (void) continue; if (IS_NODE_FUTURE(node_ptr)) continue; - if (IS_NODE_CLOUD(node_ptr) && IS_NODE_POWER_SAVE(node_ptr)) + if (IS_NODE_CLOUD(node_ptr)) { + if (slurmctld_conf.suspend_time < 1 || + slurmctld_conf.resume_program == NULL || + slurmctld_conf.suspend_program == NULL) + error("%s: Node %s configured with CLOUD state but " + "missing any of SuspendTime, SuspendProgram " + "or ResumeProgram options",__func__, + node_ptr->name); + if (IS_NODE_POWER_SAVE(node_ptr)) continue; + } if (node_ptr->port == 0) node_ptr->port = slurmctld_conf.slurmd_port; slurm_set_addr(&node_ptr->slurm_addr, node_ptr->port, diff --git a/testsuite/dejagnu.h b/testsuite/dejagnu.h index 0d47fdfa176136be2ec6113d0b5f7b1791d131f8..e6762abf4025f9266ee20e745c68ae8107db14c1 100644 --- a/testsuite/dejagnu.h +++ b/testsuite/dejagnu.h @@ -1,271 +1,316 @@ -/* - * Copyright (C) 2000, 2001 Free Software Foundation, Inc. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ +/* DejaGnu unit testing header. + Copyright (C) 2000, 2001, 2002, 2004, 2006 Free Software + Foundation, Inc. + +This file is part of DejaGnu. + +DejaGnu is free software; you can redistribute it and/or modify it +under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 3 of the License, or +(at your option) any later version. + +DejaGnu is distributed in the hope that it will be useful, but +WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +General Public License for more details. + +You should have received a copy of the GNU General Public License +along with DejaGnu; if not, write to the Free Software Foundation, +Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. */ #ifndef __DEJAGNU_H__ #define __DEJAGNU_H__ #include <stdio.h> -#include <stdarg.h> +#include <stdarg.h> #include <string.h> -#define _BUFFER_SIZE_ 512 +/* If you have problems with DejaGnu dropping failed, untested, or + * unresolved messages generated by a unit testcase, then: */ + +/* #define _DEJAGNU_WAIT_ */ -static int passed = 0; -static int failed = 0; -static int untest = 0; -static int unresolve = 0; -static int tests = 0; +#ifdef _DEJAGNU_WAIT_ +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> +#endif -static char buffer[ _BUFFER_SIZE_ ]; +static int passed; +static int failed; +static int untest; +static int unresolve; +static int xfailed; +//static int xpassed; +static char buffer[512]; -inline void -pass (const char* fmt, ... ) __attribute__ ((format (printf, 1, 2))); -inline void -pass (const char* fmt, ... ) { - va_list ap; +void +wait (void) +{ +#ifdef _DEJAGNU_WAIT_ + fd_set rfds; + struct timeval tv; - tests++; - passed++; - va_start( ap, fmt ); - vsnprintf( buffer, _BUFFER_SIZE_, fmt, ap ); - va_end( ap ); - printf ("\tPASSED: #%d %s\n", tests, buffer ); - fflush( stdout ); + FD_ZERO (&rfds); + tv.tv_sec = 0; + tv.tv_usec = 1; + + select (0, &rfds, NULL, NULL, &tv); +#endif } -inline void -fail (const char* fmt, ... ) __attribute__ ((format (printf, 1, 2))); -inline void -fail (const char* fmt, ... ) { - va_list ap; +static inline void +pass (const char* fmt, ...) +{ + va_list ap; + + passed++; + va_start (ap, fmt); + vsnprintf (buffer, sizeof (buffer), fmt, ap); + va_end (ap); + printf ("\tPASSED: %s\n", buffer); + wait (); +} - tests++; - failed++; - va_start( ap, fmt ); - vsnprintf( buffer, _BUFFER_SIZE_, fmt, ap ); - va_end( ap ); - printf ("\tFAILED: #%d %s\n", tests, buffer ); - fflush( stdout ); +static inline void +xpass (const char* fmt, ...) +{ + va_list ap; + + passed++; + va_start (ap, fmt); + vsnprintf (buffer, sizeof (buffer), fmt, ap); + va_end (ap); + printf ("\tXPASSED: %s\n", buffer); + wait (); } -inline void -untested (const char* fmt, ... ) __attribute__ ((format (printf, 1, 2))); -inline void -untested (const char* fmt, ... ) { - va_list ap; +static inline void +fail (const char* fmt, ...) +{ + va_list ap; + + failed++; + va_start (ap, fmt); + vsnprintf (buffer, sizeof (buffer), fmt, ap); + va_end (ap); + printf ("\tFAILED: %s\n", buffer); + wait (); +} - tests++; - untest++; - va_start( ap, fmt ); - vsnprintf( buffer, _BUFFER_SIZE_, fmt, ap ); - va_end( ap ); - printf ("\tUNTESTED: #%d %s\n", tests, buffer ); - fflush( stdout ); +static inline void +xfail (const char* fmt, ...) +{ + va_list ap; + + failed++; + va_start (ap, fmt); + vsnprintf (buffer, sizeof (buffer), fmt, ap); + va_end (ap); + printf ("\tXFAILED: %s\n", buffer); + wait (); } -inline void -unresolved (const char* fmt, ... ) __attribute__ ((format (printf, 1, 2))); -inline void -unresolved (const char* fmt, ... ) { - va_list ap; +static inline void +untested (const char* fmt, ...) +{ + va_list ap; + + untest++; + va_start (ap, fmt); + vsnprintf (buffer, sizeof (buffer), fmt, ap); + va_end (ap); + printf ("\tUNTESTED: %s\n", buffer); + wait (); +} - tests++; - unresolve++; - va_start( ap, fmt ); - vsnprintf( buffer, _BUFFER_SIZE_, fmt, ap ); - va_end( ap ); - printf ("\tUNRESOLVED: #%d %s\n", tests, buffer ); - fflush( stdout ); +static inline void +unresolved (const char* fmt, ...) +{ + va_list ap; + + unresolve++; + va_start (ap, fmt); + vsnprintf (buffer, sizeof (buffer), fmt, ap); + va_end (ap); + printf ("\tUNRESOLVED: %s\n", buffer); + wait (); } -inline void -note (const char* fmt, ... ) __attribute__ ((format (printf, 1, 2))); -inline void -note (const char* fmt, ... ) { - va_list ap; - - va_start( ap, fmt ); - vsnprintf( buffer, _BUFFER_SIZE_, fmt, ap ); - va_end( ap ); - printf ("\tNOTE: %s\n", buffer ); - fflush( stdout ); +static inline void +note (const char* fmt, ...) +{ + va_list ap; + + va_start (ap, fmt); + vsnprintf (buffer, sizeof (buffer), fmt, ap); + va_end (ap); + printf ("\tNOTE: %s\n", buffer); + wait (); } -inline void -totals (void) { - printf ("\nTotals:\n"); - printf ("\t#passed:\t\t%d\n", passed); - printf ("\t#failed:\t\t%d\n", failed); - if (untest) - printf ("\t#untested:\t\t%d\n", untest); - if (unresolve) - printf ("\t#unresolved:\t\t%d\n", unresolve); +static inline void +totals (void) +{ + printf ("\nTotals:\n"); + printf ("\t#passed:\t\t%d\n", passed); + printf ("\t#real failed:\t\t%d\n", failed); + if (xfailed) + printf ("\t#expected failures:\t\t%d\n", xfailed); + if (untest) + printf ("\t#untested:\t\t%d\n", untest); + if (unresolve) + printf ("\t#unresolved:\t\t%d\n", unresolve); } #ifdef __cplusplus - #include <iostream> #include <iomanip> #include <fstream> #include <string> -#if 0 -#if HAVE_STL3 -#include <sstream> -#else -#include <strstream> -#endif -#endif const char *outstate_list[] = { - "FAILED: ", - "PASSED: ", - "UNTESTED: ", - "UNRESOLVED: " + "FAILED: ", "PASSED: ", "UNTESTED: ", "UNRESOLVED: ", "XFAILED: ", "XPASSED: " }; const char ** outstate = outstate_list; -#if 0 -extern ios& __iomanip_testout (ios&, int); -inline smanip<int> testout (int n) { - return smanip<int> (__iomanip_testout, n); -} -ios & __iomanip_testout (ios& i, int x) { - return i; -} - -template<class T> -class OMANIP { - private: - T i; - ostream &(*f)(ostream&, T); - public: - OMANIP(ostream& (*ff)(ostream&, T), T ii) : f(ff), i(ii) { - } - friend ostream operator<<(ostream& us, OMANIP& m) { - return m.f(os,m.i); - } -}; - -ostream& -freakout(ostream& os, int x) { - return os << "FREAKOUT" ; - // return x << "TESTOUT " << x ; -} - -OMANIP<int> testout(int i) { - return OMANIP<int>(&freakout,i); -} -#endif - -enum teststate {FAILED, PASSED,UNTESTED,UNRESOLVED} laststate; +enum teststate { FAILED, PASSED, UNTESTED, UNRESOLVED, XFAILED, XPASSED} laststate; class TestState { - private: - teststate laststate; - std::string lastmsg; - public: - TestState(void) { - passed = 0; - failed = 0; - untest = 0; - unresolve = 0; - } - ~TestState(void) { - totals(); - }; - - void testrun (bool b, std::string s) { - if (b) - pass (s); - else - fail (s); - } - - void pass (std::string s) { - passed++; - laststate = PASSED; - lastmsg = s; - std::cout << "\t" << outstate[PASSED] << s << std::endl; - } - void pass (const char *c) { - std::string s = c; - pass (s); - } - - void fail (std::string s) { - failed++; - laststate = FAILED; - lastmsg = s; - std::cout << "\t" << outstate[FAILED] << s << std::endl; - } - void fail (const char *c) { - std::string s = c; - fail (s); - } - - void untested (std::string s) { - untest++; - laststate = UNTESTED; - lastmsg = s; - std::cout << "\t" << outstate[UNTESTED] << s << std::endl; - } - void untested (const char *c) { - std::string s = c; - untested (s); - } - - void unresolved (std::string s) { - unresolve++; - laststate = UNRESOLVED; - lastmsg = s; - std::cout << "\t" << outstate[UNRESOLVED] << s << std::endl; - } - void unresolved (const char *c) { - std::string s = c; - unresolved (s); - } - - void totals (void) { - std::cout << "\t#passed:\t\t" << passed << std::endl; - std::cout << "\t#failed:\t\t" << failed << std::endl; - if (untest) - std::cout << "\t#untested:\t\t" << untest << std::endl; - if (unresolve) - std::cout << "\t#unresolved:\t\t" << unresolve << std::endl; - } - - // This is so this class can be printed in an ostream. - friend std::ostream & operator << (std::ostream &os, TestState& t) { - return os << "\t" << outstate[t.laststate] << t.lastmsg ; - } - - int GetState(void) { - return laststate; - } - std::string GetMsg(void) { - return lastmsg; - } -}; + private: + teststate laststate; + std::string lastmsg; + public: + TestState (void) + { + passed = 0; + failed = 0; + untest = 0; + xpassed = 0; + xfailed = 0; + unresolve = 0; + } + + ~TestState (void) { totals(); } + + void testrun (bool b, std::string s) + { + if (b) + pass (s); + else + fail (s); + } + + void pass (std::string s) + { + passed++; + laststate = PASSED; + lastmsg = s; + std::cout << "\t" << outstate[PASSED] << s << std::endl; + } + + void pass (const char *c) + { + std::string s = c; + pass (s); + } + + void xpass (std::string s) + { + xpassed++; + laststate = PASSED; + lastmsg = s; + std::cout << "\t" << outstate[XPASSED] << s << std::endl; + } + + void xpass (const char *c) + { + std::string s = c; + xpass (s); + } + + void fail (std::string s) + { + failed++; + laststate = FAILED; + lastmsg = s; + std::cout << "\t" << outstate[FAILED] << s << std::endl; + } + + void fail (const char *c) + { + std::string s = c; + fail (s); + } + + void xfail (std::string s) + { + xfailed++; + laststate = XFAILED; + lastmsg = s; + std::cout << "\t" << outstate[XFAILED] << s << std::endl; + } + + void xfail (const char *c) + { + std::string s = c; + xfail (s); + } + + void untested (std::string s) + { + untest++; + laststate = UNTESTED; + lastmsg = s; + std::cout << "\t" << outstate[UNTESTED] << s << std::endl; + } + + void untested (const char *c) + { + std::string s = c; + untested (s); + } + + void unresolved (std::string s) + { + unresolve++; + laststate = UNRESOLVED; + lastmsg = s; + std::cout << "\t" << outstate[UNRESOLVED] << s << std::endl; + } + + void unresolved (const char *c) + { + std::string s = c; + unresolved (s); + } + + void totals (void) + { + std::cout << "\t#passed:\t\t" << passed << std::endl; + std::cout << "\t#real failed:\t\t" << failed << std::endl; + if (xfailed) + std::cout << "\t#expected failures:\t\t" << xfailed << std::endl; + if (xpassed) + std::cout << "\t#unexpected passes:\t\t" << xpassed << std::endl; + if (untest) + std::cout << "\t#untested:\t\t" << untest << std::endl; + if (unresolve) + std::cout << "\t#unresolved:\t\t" << unresolve << std::endl; + } -#endif // __cplusplus -#endif // _DEJAGNU_H_ + // This is so this class can be printed in an ostream. + friend std::ostream & operator << (std::ostream &os, TestState& t) + { + return os << "\t" << outstate[t.laststate] << t.lastmsg ; + } + int GetState (void) { return laststate; } + std::string GetMsg (void) { return lastmsg; } +}; +#endif /* __cplusplus */ +#endif /* _DEJAGNU_H_ */ diff --git a/testsuite/expect/globals b/testsuite/expect/globals index fe01fa96bbbf50d479b7a3c07e4a734485c42189..9bde0adc87e9c94ac2919de499772597606f7951 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -3560,3 +3560,40 @@ proc available_nodes_hostnames { partition } { log_user 1 return $idle_nodelist } + +##################################################################### +# +# Proc: test_accting_steps +# +# Purpose: Determine if nostep or nojobs is set for +# AccoutingStorageEnforce +# +# Returns: 1 if set else 0 +# +##################################################################### + +proc test_accting_steps { } { + + global scontrol alpha_numeric_comma + log_user 0 + set enforce_limits 1 + spawn $scontrol show config + expect { + -re "AccountingStorageEnforce *= ($alpha_numeric_comma)" { + if {[string first "nosteps" $expect_out(1,string)] != -1 } { + set enforce_limits 0 + } + if {[string first "nojobs" $expect_out(1,string)] != -1 } { + set enforce_limits 0 + } + exp_continue + } + eof { + wait + } + + } + log_user 1 + + return $enforce_limits +} diff --git a/testsuite/expect/test1.75 b/testsuite/expect/test1.75 index 2b1203be2c2ec526138baa6b0e181ba3cb52dbfc..29246e222568e46e6bb99b14ec7c87bdb31c75a7 100755 --- a/testsuite/expect/test1.75 +++ b/testsuite/expect/test1.75 @@ -73,6 +73,11 @@ if {[test_cpu_affinity_or_cgroup] == 0} { send_user "\nWARNING: This test requires some form of task affinity\n" exit 0 } +if {![test_accting_steps]} { + send_user "\nWARNING: This test can not be run with nosteps or nojobs " + send_user "(AccountingStorageEnforce)\n" + exit 0 +} proc sub_job { freq } { diff --git a/testsuite/expect/test12.2 b/testsuite/expect/test12.2 index 429afbeb32bbd7bf9f27986a69ef037cb62d920c..13bd7e2d6b824a189d374f1c48cd3fb0494382e2 100755 --- a/testsuite/expect/test12.2 +++ b/testsuite/expect/test12.2 @@ -54,6 +54,11 @@ if {[test_front_end]} { send_user "\nWARNING: This test is incompatible with front-end systems\n" exit $exit_code } +if {![test_accting_steps]} { + send_user "\nWARNING: This test can not be run with nosteps or nojobs " + send_user "(AccountingStorageEnforce)\n" + exit 0 +} if {[test_launch_poe]} { # Allow extra time and memory for the POE process diff --git a/testsuite/expect/test12.7 b/testsuite/expect/test12.7 index 10fd3e84e6c66316fe9de26b19c0b293bcca5c41..c52aa674d077a2aa67dbbee96e3f6cac9c20528a 100755 --- a/testsuite/expect/test12.7 +++ b/testsuite/expect/test12.7 @@ -39,6 +39,11 @@ set file_in "test$test_id\_sc" print_header $test_id +if {![test_accting_steps]} { + send_user "\nWARNING: This test can not be run with nosteps or nojobs " + send_user "(AccountingStorageEnforce)\n" + exit 0 +} if { [test_super_user] == 0 } { send_user "WARNING: Test can only be run as SlurmUser\n" exit $exit_code diff --git a/testsuite/expect/test12.8 b/testsuite/expect/test12.8 index 0d650b9abe7249f92c63c02924651345d2fd7cd7..452c7d438614e8def807d245ba23eb46df4358dd 100755 --- a/testsuite/expect/test12.8 +++ b/testsuite/expect/test12.8 @@ -38,6 +38,11 @@ set file_in "test$test_id\_sc" print_header $test_id +if {![test_accting_steps]} { + send_user "\nWARNING: This test can not be run with nosteps or nojobs " + send_user "(AccountingStorageEnforce)\n" + exit 0 +} if {[test_using_slurmdbd] == 0} { send_user "\nWARNING: This test requires use of Slurmdbd\n" exit $exit_code diff --git a/testsuite/expect/test14.10 b/testsuite/expect/test14.10 index 36bb6da1666d015bae5c0b5e1ef8a79c9efc4f5d..b5e638f67d70ff4136f239c2567d2cd740d6b648 100755 --- a/testsuite/expect/test14.10 +++ b/testsuite/expect/test14.10 @@ -41,6 +41,11 @@ set node2 "" print_header $test_id +if {![test_accting_steps]} { + send_user "\nWARNING: This test can not be run with nosteps or nojobs " + send_user "(AccountingStorageEnforce)\n" + exit 0 +} if {[test_front_end] != 0} { send_user "\nWARNING: This test is incompatible with front-end systems\n" exit 0 diff --git a/testsuite/expect/test14.6 b/testsuite/expect/test14.6 index cb55666d4da6c6df8413d6c92b31e926a9d1d7eb..3ebf0319cf7f28f0447765b5dacab4bcf530714b 100755 --- a/testsuite/expect/test14.6 +++ b/testsuite/expect/test14.6 @@ -42,6 +42,11 @@ set job_id 0 print_header $test_id +if {![test_accting_steps]} { + send_user "\nWARNING: This test can not be run with nosteps or nojobs " + send_user "(AccountingStorageEnforce)\n" + exit 0 +} if {[test_front_end] != 0} { send_user "\nWARNING: This test is incompatible with front-end systems\n" exit 0 diff --git a/testsuite/expect/test7.13 b/testsuite/expect/test7.13 index ef5f681e73c981f59f9a9d48c261b30321f4c4b3..8ba353cd4dd8c90e6eeeda075825f06cfd4d4212 100755 --- a/testsuite/expect/test7.13 +++ b/testsuite/expect/test7.13 @@ -39,6 +39,11 @@ set file_prog2 "test$test_id.prog2" print_header $test_id +if {![test_accting_steps]} { + send_user "\nWARNING: This test can not be run with nosteps or nojobs " + send_user "(AccoutingStorageEnforce)\n" + exit 0 +} if {[test_launch_poe]} { send_user "\nWARNING: This test is incompatible with launch/poe systems\n" exit 0 diff --git a/testsuite/expect/test7.9 b/testsuite/expect/test7.9 index bfbfd011da766fd6666819119887c3c95f73e3e2..3e7360801f4776df826f28a18969e4dd2044a42f 100755 --- a/testsuite/expect/test7.9 +++ b/testsuite/expect/test7.9 @@ -42,6 +42,11 @@ set iterations 50 print_header $test_id +if {![test_accting_steps]} { + send_user "\nWARNING: This test can not be run with nosteps or nojobs " + send_user "(AccoutingStorageEnforce)\n" + exit 0 +} # # Test is incompatible with proctrack/aix, proctrack/rms, proctrack/sgi_job, # proctrack/cray, and switch/elan