diff --git a/NEWS b/NEWS index a904c9b0bc7ce80801a78d2e2b71a13e555e035a..836acdd6f6a43f6aeba64697e3114f3aac546339 100644 --- a/NEWS +++ b/NEWS @@ -79,6 +79,14 @@ documents those changes that are of interest to users and admins. configured with more CPUs than actually exist, return a value of TASKS equal to the number of configured CPUs that are allocated to a job rather than the number of physical CPUs allocated. + -- For sched/wiki2, timeout "srun --get-user-env ..." command after 3 seconds + if unable to perform pseudo-login and get user environment variables. + -- The pseudo-login (initiated by srun's --get-user-env option) will no longer + execute the users .login script, just the dot file(s) for spawning the + user's default shell. + -- Add contribs/time_login.c program to test how long pseudo-login takes + for specific users or all users. This can identify users for which Moab + job submissions are unable to set the proper environment variables. * Changes in SLURM 1.2.17 ========================= diff --git a/contribs/Makefile.am b/contribs/Makefile.am index 4ea5912bbe739f5d5a1020a77c893af7b6d97f3a..7e6b666ba19e4e396c30c96a584b4d2f4605dc62 100644 --- a/contribs/Makefile.am +++ b/contribs/Makefile.am @@ -4,4 +4,5 @@ EXTRA_DIST = \ make.slurm.patch \ mpich1.slurm.patch \ ptrace.patch \ + time_login.c \ README diff --git a/contribs/Makefile.in b/contribs/Makefile.in index a477933d1fe76fae94550ac86aab4f944cb6c496..61f1a147c02e66836b4a1e37c6ea3d452e66fc4a 100644 --- a/contribs/Makefile.in +++ b/contribs/Makefile.in @@ -243,6 +243,7 @@ EXTRA_DIST = \ make.slurm.patch \ mpich1.slurm.patch \ ptrace.patch \ + time_login.c \ README all: all-recursive diff --git a/contribs/README b/contribs/README index aa511f34cbd2c8e7120a3510242cbbadd905a1c7..ad9792738294c490ad31f8db94a614f03cb0f91d 100644 --- a/contribs/README +++ b/contribs/README @@ -11,6 +11,12 @@ of the SLURM contribs distribution follows: API to SLURM using perl. Making available all SLURM command that exist in the SLURM proper API. + time_login.c [ C program ] + This program will report how long a pseudo-login will take for specific + users or all users on the system. Users identified by this program + will not have their environment properly set for jobs submitted through + Moab. Build with "make -f /dev/null time_login" and execute as user root. + torque/ [ Wrapper Scripts for Torque migration to SLURM ] Helpful scripts to make transition to SLURM easier from PBS or Torque. These scripts are easily updatable if there is functionality missing. diff --git a/contribs/time_login.c b/contribs/time_login.c new file mode 100644 index 0000000000000000000000000000000000000000..79425f39ccf683b5796abff46f5802402ed74f0f --- /dev/null +++ b/contribs/time_login.c @@ -0,0 +1,195 @@ +/*****************************************************************************\ + * This program is used to identify users for whom a pseudo-login takes + * more than SU_WAIT_MSEC to complete. Either enter specific user names + * on the execute line (e.g.. "time_login alice bob") or provide no input + * on the execute line to test all users in the /etc/passwd file with a + * UID greater than 100 (avoiding various system users). + * + * Users indentified for whom the pseudo-login takes too long will not + * have their environment variables set by Moab on job submit, which + * relies upon the srun "--get-user-env" option to get this information. + * See SLURM's env_array_user_default() code in src/common/env.c. + * This option is presently used only by Moab. + ***************************************************************************** + * Copyright (C) 2007 The Regents of the University of California. + * Produced at Lawrence Livermore National Laboratory (cf, DISCLAIMER). + * Written by Morris Jette <jette1@llnl.gov>. + * UCRL-CODE-226842. + * + * This file is part of SLURM, a resource management program. + * For details, see <http://www.llnl.gov/linux/slurm/>. + * + * SLURM is free software; you can redistribute it and/or modify it under + * the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + * In addition, as a special exception, the copyright holders give permission + * to link the code of portions of this program with the OpenSSL library under + * certain conditions as described in each individual source file, and + * distribute linked combinations including the two. You must obey the GNU + * General Public License in all respects for all of the code used other than + * OpenSSL. If you modify file(s) with this exception, you may extend this + * exception to your version of the file(s), but you are not obligated to do + * so. If you do not wish to do so, delete this exception statement from your + * version. If you delete this exception statement from all source files in + * the program, then also delete it here. + * + * SLURM is distributed in the hope that it will be useful, but WITHOUT ANY + * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more + * details. + * + * You should have received a copy of the GNU General Public License along + * with SLURM; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +\*****************************************************************************/ +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/wait.h> + +#define SU_WAIT_MSEC 3000 +static void _parse_line(char *in_line, char **user_name, int *user_id); +static long int _time_login(char *user_name); + +main (int argc, char **argv) +{ + FILE *passwd_fd; + char in_line[256], *user_name; + int i, user_id; + long int delta_t; + + if (geteuid() != (uid_t)0) { + printf("need to run as user root\n"); + exit(1); + } + + for (i=1; i<argc; i++) { + delta_t = _time_login(argv[i]); + printf("user %-8s time %ld usec\n", argv[i], delta_t); + } + if (i > 1) + exit(0); + + passwd_fd = fopen("/etc/passwd", "r"); + if (!passwd_fd) { + perror("fopen(/etc/passwd)"); + exit(1); + } + + while (fgets(in_line, sizeof(in_line), passwd_fd)) { + _parse_line(in_line, &user_name, &user_id); + if (user_id <= 100) + continue; + delta_t = _time_login(user_name); + if (delta_t < ((SU_WAIT_MSEC * 0.8) * 1000)) + continue; + printf("user %-8s time %ld usec\n", user_name, delta_t); + } + fclose(passwd_fd); +} + +static void _parse_line(char *in_line, char **user_name, int *user_id) +{ + char *tok; + + *user_name = strtok(in_line, ":"); + (void) strtok(NULL, ":"); + tok = strtok(NULL, ":"); + if (tok) + *user_id = atoi(tok); + else { + perror("strtok"); + *user_id = 0; + } +} + +static long int _time_login(char *user_name) +{ + FILE *su; + char line[BUFSIZ]; + char name[BUFSIZ]; + char value[BUFSIZ]; + int fildes[2], found, fval, rc, timeleft; + pid_t child; + struct timeval begin, now; + struct pollfd ufds; + long int delta_t; + + if (pipe(fildes) < 0) { + perror("pipe"); + return -1; + } + + child = fork(); + if (child == -1) { + perror("fork"); + return -1; + } + if (child == 0) { + close(0); + open("/dev/null", O_RDONLY); + dup2(fildes[1], 1); + close(2); + open("/dev/null", O_WRONLY); +#if 1 + /* execute .profile only */ + execl("/bin/su", "su", user_name, "-c", + "echo; echo; echo HELLO", NULL); +#else + /* execute .login plus .profile */ + execl("/bin/su", "su", "-", user_name, "-c", + "echo; echo; echo HELLO", NULL); +#endif + exit(1); + } + + close(fildes[1]); + if ((fval = fcntl(fildes[0], F_GETFL, 0)) >= 0) + fcntl(fildes[0], F_SETFL, fval | O_NONBLOCK); + su= fdopen(fildes[0], "r"); + + gettimeofday(&begin, NULL); + ufds.fd = fildes[0]; + ufds.events = POLLIN; + found = 0; + while (!found) { + gettimeofday(&now, NULL); + timeleft = SU_WAIT_MSEC; + timeleft -= (now.tv_sec - begin.tv_sec) * 1000; + timeleft -= (now.tv_usec - begin.tv_usec) / 1000; + if (timeleft <= 0) + break; + if ((rc = poll(&ufds, 1, timeleft)) <= 0) { + if (rc == 0) /* timeout */ + break; + if ((errno == EINTR) || (errno == EAGAIN)) + continue; + perror("poll"); + break; + } + if ((ufds.revents & POLLERR) || (ufds.revents & POLLHUP)) + break; + while (fgets(line, BUFSIZ, su)) { + if (!strncmp(line, "HELLO", 5)) { + found = 1; + break; + } + } + } + close(fildes[0]); + waitpid(-1, NULL, WNOHANG); + + if (!found) + return (SU_WAIT_MSEC * 1000); + + delta_t = (now.tv_sec - begin.tv_sec) * 1000000; + delta_t += now.tv_usec - begin.tv_usec; + return delta_t; +} diff --git a/src/common/env.c b/src/common/env.c index d56b609a48a29066989c03a48ae702f6693d6cef..c1e13e8635a9f756ff894ee82656c29fd91752b3 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -40,11 +40,13 @@ # include "config.h" #endif +#include <fcntl.h> #include <stdio.h> #include <stdarg.h> #include <stdlib.h> #include <string.h> #include <unistd.h> +#include <sys/poll.h> #include <sys/types.h> #include "src/common/macros.h" @@ -73,6 +75,8 @@ strong_alias(env_array_append_fmt, slurm_env_array_append_fmt); strong_alias(env_array_overwrite, slurm_env_array_overwrite); strong_alias(env_array_overwrite_fmt, slurm_env_array_overwrite_fmt); +#define SU_WAIT_MSEC 3000 /* 3000 msec for /bin/su to return user + * env vars for --get-user-env option */ /* * Return pointer to `name' entry in environment if found, or * pointer to the last entry (i.e. NULL) if `name' is not @@ -1246,48 +1250,130 @@ char **env_array_user_default(const char *username) char line[BUFSIZ]; char name[BUFSIZ]; char value[BUFSIZ]; - char *cmdstr = xstrdup(""); char **env = NULL; char *starttoken = "XXXXSLURMSTARTPARSINGHEREXXXX"; - char *stoptoken = "XXXXSLURMSTOPPARSINGHEREXXXXX"; - int len; + char *stoptoken = "XXXXSLURMSTOPPARSINGHEREXXXXX"; + char cmdstr[256]; + int fildes[2], found, fval, len, rc, timeleft; + pid_t child; + struct timeval begin, now; + struct pollfd ufds; if (geteuid() != (uid_t)0) { info("WARNING: you must be root to use --get-user-env"); return NULL; } - xstrfmtcat(cmdstr, "/bin/su - %s -c \"echo; echo; echo; echo %s; env; echo %s\" 2>/dev/null", - username, starttoken, stoptoken); - su = popen(cmdstr, "r"); - xfree(cmdstr); - if (su == NULL) { + if (pipe(fildes) < 0) { + error("pipe: %m"); return NULL; } - env = env_array_create(); + child = fork(); + if (child == -1) { + error("fork: %m"); + return NULL; + } + if (child == 0) { + close(0); + open("/dev/null", O_RDONLY); + dup2(fildes[1], 1); + close(2); + open("/dev/null", O_WRONLY); + snprintf(cmdstr, sizeof(cmdstr), + "echo; echo; echo; echo %s; env; echo %s", + starttoken, stoptoken); +#if 1 + /* execute .profile only */ + execl("/bin/su", "su", username, "-c", cmdstr, NULL); +#else + /* execute .login plus .profile */ + execl("/bin/su", "su", "-", username, "-c", cmdstr, NULL); +#endif + exit(1); + } + + close(fildes[1]); + if ((fval = fcntl(fildes[0], F_GETFL, 0)) >= 0) + fcntl(fildes[0], F_SETFL, fval | O_NONBLOCK); + su= fdopen(fildes[0], "r"); + + gettimeofday(&begin, NULL); + ufds.fd = fildes[0]; + ufds.events = POLLIN; /* First look for the start token in the output */ len = strlen(starttoken); - while (fgets(line, BUFSIZ, su) != NULL) { - if (0 == strncmp(line, starttoken, len)) { + found = 0; + while (!found) { + gettimeofday(&now, NULL); + timeleft = SU_WAIT_MSEC; + timeleft -= (now.tv_sec - begin.tv_sec) * 1000; + timeleft -= (now.tv_usec - begin.tv_usec) / 1000; + if (timeleft <= 0) + break; + if ((rc = poll(&ufds, 1, timeleft)) <= 0) { + if (rc == 0) { + verbose("timeout waiting for /bin/su to complete"); + break; + } + if ((errno == EINTR) || (errno == EAGAIN)) + continue; + error("poll: %m"); + break; + } + if ((ufds.revents & POLLERR) || (ufds.revents & POLLHUP)) break; + while (fgets(line, BUFSIZ, su)) { + if (!strncmp(line, starttoken, len)) { + found = 1; + break; + } } } + if (!found) { + error("Failed to get user environment variables"); + close(fildes[0]); + return NULL; + } /* Now read in the environment variable strings. */ + env = env_array_create(); len = strlen(stoptoken); - while (fgets(line, BUFSIZ, su) != NULL) { + found = 0; + while (!found) { + gettimeofday(&now, NULL); + timeleft = SU_WAIT_MSEC; + timeleft -= (now.tv_sec - begin.tv_sec) * 1000; + timeleft -= (now.tv_usec - begin.tv_usec) / 1000; + if (timeleft <= 0) + break; + if ((rc = poll(&ufds, 1, timeleft)) <= 0) { + if (rc == 0) { + verbose("timeout waiting for /bin/su to complete"); + break; + } + if ((errno == EINTR) || (errno == EAGAIN)) + continue; + error("poll: %m"); + break; + } /* stop at the line containing the stoptoken string */ - if (0 == strncmp(line, stoptoken, len)) { + if ((ufds.revents & POLLERR) || (ufds.revents & POLLHUP)) break; + while (fgets(line, BUFSIZ, su)) { + if (!strncmp(line, stoptoken, len)) { + found = 1; + break; + } } _strip_cr_nl(line); _env_array_entry_splitter(line, name, BUFSIZ, value, BUFSIZ); env_array_overwrite(&env, name, value); } - pclose(su); + close(fildes[0]); return env; } + diff --git a/testsuite/expect/test1.90 b/testsuite/expect/test1.90 index 673300787ad07a0e75179f2e60e48a208f63b006..9301ca81a1724fbda67d57486932d232f3f5b18a 100755 --- a/testsuite/expect/test1.90 +++ b/testsuite/expect/test1.90 @@ -42,9 +42,14 @@ print_header $test_id # Test if memory affinity support is supported. # set affinity 0 +set fast_sched 0 log_user 0 spawn $scontrol show config expect { + -re "FastSchedule *= ($number)" { + set fast_sched $expect_out(1,string) + exp_continue + } -re "task/affinity" { set affinity 1 exp_continue @@ -53,6 +58,10 @@ expect { wait } } +if {$fast_sched > 1} { + send_user "\nWARNING: FastSchedule > 1 not compatable with this test\n" + exit 0 +} spawn ls /usr/include/numa.h expect { -nocase "no such file" { @@ -80,7 +89,7 @@ exec $bin_chmod 700 $file_prog # # Create an allocation # -set salloc_pid [spawn $salloc -N1 --verbose -t2 $bin_bash] +set salloc_pid [spawn $salloc -N1 --exclusive --verbose -t2 $bin_bash] # # Run a job step to get allocated processor count and affinity