diff --git a/COPYING b/COPYING index 4938ad93383257ed594cf5d953e7e31bf5cfdafc..55269c8e3ac4b51299dfaa0e4a1f4d7af084f177 100644 --- a/COPYING +++ b/COPYING @@ -1,5 +1,6 @@ All SLURM code and documentation is available under the GNU General Public -License. +License. Some tools in the "contribs" directory have other licenses. See +the documentation for individual contributed tools for details. In addition, as a special exception, the copyright holders give permission to link the code of portions of this program with the OpenSSL library under diff --git a/NEWS b/NEWS index 0f03347a9236cc6d9f363927b98eca793c42b7d1..94428db409defb16ed4d231ebb72b96eda1f634d 100644 --- a/NEWS +++ b/NEWS @@ -56,6 +56,16 @@ documents those changes that are of interest to users and admins. where slurmd daemon started from. -- Log errors running task prolog or task epilog to srun's output. -- In sched/wiki2, fix bug processing condensed hostlist expressions. + -- Release contribs/mpich1.slurm.patch without GPL license. + -- Fix bug in mvapich plugin for read/write calls that return EAGAIN. + -- Don't start MVAPICH timeout logic until we know that srun is starting + an MVAPICH program. + -- Fix to srun only allocating number of nodes needed for requested task + count when combining allocation and step creation in srun. + -- Execute task-prolog within proctrack container to insure that all + child processes get terminated. + -- Fixed job accounting to work with sgi_job proctrack plugin. + * Changes in SLURM 1.2.15 ========================= diff --git a/contribs/mpich1.slurm.patch b/contribs/mpich1.slurm.patch index 738fb9eda602fcff51d52ad967d28c47274fa087..70990087ad17f3842a1d80776d342f6e3829ff4d 100644 --- a/contribs/mpich1.slurm.patch +++ b/contribs/mpich1.slurm.patch @@ -1,3 +1,56 @@ +This work was produced at the University of California, Lawrence Livermore +National Laboratory (UC LLNL) under contract no. W-7405-ENG-48 (Contract 48) +between the U.S. Department of Energy (DOE) and The Regents of the University +of California (University) for the operation of UC LLNL. The rights of the +Federal Government are reserved under Contract 48 subject to the restrictions +agreed upon by the DOE and Universiity as allowed under DOE Acquisition +Letter 97-1. + + +DISCLAIMER + +This work was prepared as an account of work sponsored by an agency of the +United States Government. Neither the United States Government nor the +University of California nor any of their employees, makes any warranty, +express or implied, or assumes any liability or responsibility for the +accuracy, completeness, or usefulness of any information, apparatus, product, +or process disclosed, or represented that its use would not infringe +privately-owned rights. Reference herein to any specific commercial products, +process, or service by trade name, trademark, manufacturer or otherwise does +not necessarily constitute or imply its endorsement, recommendation, or +favoring by the United States Government or the University of California. +The views and opinions of authors expressed herein do not necessarily state +or reflect those of the United States Government or the University of +California, and shall not be used for advertising or product endorsement +purposes. + + +NOTIFICATION OF COMMERCIAL USE + +Commercialization of this product is prohibited without notifying the +Department of Energy (DOE) or Lawrence Livermore National Laboratory (LLNL). + + +USE OF THIS PATCH + +This patch makes use of SLURM's srun command to launch all tasks. +IMPORTANT: In order to launch more than one task per mode, shared +memory is used for communications. You must explicitly enable shared +memory when building MPICH with the following configure line: + ./configure --with-device=ch_p4 --with-comm=shared + +Applications must be rebuilt with this new library to function +with SLURM launch. The "--mpi=mpich1_p4" srun option MUST be +used to launch the tasks (it sets a bunch of environment variables +and launches only one task per node, the MPICH library launches +the other tasks on the node). Here is a sample execute line: + srun --mpi=mpich1_p4 [srun_options...] <progname> [options...] + + +IDENTIFICATION: UCRL-CODE-234229 + + + Index: mpid/ch_p4/p4/lib/p4_args.c =================================================================== --- mpid/ch_p4/p4/lib/p4_args.c (revision 11616) diff --git a/src/api/job_info.c b/src/api/job_info.c index a8b1ecf8637236a23f06ce4ffbc009d7228a8f8b..0649f88e6ff936ab896ee8d880349f4e5943185b 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -128,7 +128,7 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) char tmp1[128], tmp2[128]; char tmp_line[128]; char *ionodes = NULL; - uint16_t term_sig = 0; + uint16_t exit_status = 0, term_sig = 0; char *out = NULL; #ifdef HAVE_BG @@ -193,9 +193,11 @@ slurm_sprint_job_info ( job_info_t * job_ptr, int one_liner ) xstrcat(out, tmp_line); if (WIFSIGNALED(job_ptr->exit_code)) term_sig = WTERMSIG(job_ptr->exit_code); + else + exit_status = WEXITSTATUS(job_ptr->exit_code); snprintf(tmp_line, sizeof(tmp_line), "ExitCode=%u:%u", - WEXITSTATUS(job_ptr->exit_code), term_sig); + exit_status, term_sig); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); diff --git a/src/common/env.c b/src/common/env.c index f13ecf91fc6e919d93e3876e339884a2a98f7486..d56b609a48a29066989c03a48ae702f6693d6cef 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -294,7 +294,7 @@ int setup_env(env_t *env) if (env->cpus_on_node && setenvf(&env->env, "SLURM_CPUS_ON_NODE", "%d", env->cpus_on_node) ) { - error("Unable to set SLURM_CPUS_PER_TASK"); + error("Unable to set SLURM_CPUS_ON_NODE"); rc = SLURM_FAILURE; } diff --git a/src/plugins/jobacct/common/common_slurmstepd.c b/src/plugins/jobacct/common/common_slurmstepd.c index 2426dcbd37a03802bf7dd1e1698506bb80a78aed..22b1222cf34b5796f4ca9a2944297323f81afdea 100644 --- a/src/plugins/jobacct/common/common_slurmstepd.c +++ b/src/plugins/jobacct/common/common_slurmstepd.c @@ -62,7 +62,7 @@ extern int common_set_proctrack_container_id(uint32_t id) info("Warning: jobacct: set_proctrack_container_id: " "cont_id is already set to %d you are setting it to %d", cont_id, id); - if(id <= 0) { + if((int)id <= 0) { error("jobacct: set_proctrack_container_id: " "I was given most likely an unset cont_id %d", id); diff --git a/src/plugins/mpi/mvapich/mvapich.c b/src/plugins/mpi/mvapich/mvapich.c index 1be8fef2ab085f9736869873ed4d7cfe9b68a6cd..96784b0b7c7a3c80d1bee5f288fe7d242bfb0b63 100644 --- a/src/plugins/mpi/mvapich/mvapich.c +++ b/src/plugins/mpi/mvapich/mvapich.c @@ -230,7 +230,7 @@ static int startup_timeout (mvapich_state_t *st) now = time (NULL); if (!st->start_time) - st->start_time = now; + return (-1); remaining = st->timeout - (now - st->start_time); @@ -282,19 +282,24 @@ static int mvapich_write_n (mvapich_state_t *st, struct mvapich_info *mvi, { int nleft = len; int n = 0; + unsigned char * p = buf; - while (nleft) { + while (nleft > 0) { /* Poll for write-activity */ if (mvapich_poll (st, mvi, 1) < 0) return (-1); - if ((n = fd_write_n (mvi->fd, buf, len)) < 0 && (errno != EINTR)) + if ((n = write (mvi->fd, p, nleft)) < 0) { + if (errno == EAGAIN || errno == EINTR) + continue; return (-1); + } nleft -= n; + p += n; } - return (n); + return (len - nleft); } static int mvapich_read_n (mvapich_state_t *st, struct mvapich_info *mvi, @@ -302,19 +307,27 @@ static int mvapich_read_n (mvapich_state_t *st, struct mvapich_info *mvi, { int nleft = len; int n = 0; + unsigned char * p = buf; - while (nleft) { + while (nleft > 0) { /* Poll for write-activity */ if (mvapich_poll (st, mvi, 0) < 0) return (-1); - if ((n = fd_read_n (mvi->fd, buf, len)) < 0 && (errno != EINTR)) + if ((n = read (mvi->fd, p, nleft)) < 0) { + if (errno == EAGAIN || errno == EINTR) + continue; + return (-1); + } + + if (n == 0) /* unexpected EOF */ return (-1); nleft -= n; + p += n; } - return (n); + return (len - nleft); } @@ -340,6 +353,8 @@ static int mvapich_abort_sends_rank (mvapich_state_t *st) static int mvapich_get_task_info (mvapich_state_t *st, struct mvapich_info *mvi) { + mvi->do_poll = 0; + if (mvapich_read_n (st, mvi, &mvi->addrlen, sizeof (int)) <= 0) return error ("mvapich: Unable to read addrlen for rank %d: %m", mvi->rank); @@ -365,8 +380,6 @@ static int mvapich_get_task_info (mvapich_state_t *st, mvi->rank); } - mvi->do_poll = 0; - return (0); } @@ -1133,6 +1146,10 @@ again: if (first) { mvapich_debug ("first task checked in"); do_timings (st); + /* + * Officially start timeout timer now. + */ + st->start_time = time(NULL); first = 0; } diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index ecdd9efb359114e2bfd37699ac0685bf7c7502ee..460dfb812b068e14f565c923ebf327d341da6e93 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1665,12 +1665,12 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue, if (job_return_code == NO_VAL) { job_ptr->job_state = JOB_CANCELLED| job_comp_flag; job_ptr->requid = uid; - } else if (WEXITSTATUS(job_return_code)) { + } else if (WIFEXITED(job_return_code) && + WEXITSTATUS(job_return_code)) { job_ptr->job_state = JOB_FAILED | job_comp_flag; job_ptr->exit_code = job_return_code; job_ptr->state_reason = FAIL_EXIT_CODE; - } - else if (job_comp_flag && /* job was running */ + } else if (job_comp_flag && /* job was running */ (job_ptr->end_time < now)) { /* over time limit */ job_ptr->job_state = JOB_TIMEOUT | job_comp_flag; job_ptr->exit_code = MAX(job_ptr->exit_code, 1); diff --git a/src/slurmd/slurmstepd/mgr.c b/src/slurmd/slurmstepd/mgr.c index 48d256d5226f748b5eefd76af58a54c7ab2aa0ee..8861e9153667a8246a0e3067df86f73f7130834f 100644 --- a/src/slurmd/slurmstepd/mgr.c +++ b/src/slurmd/slurmstepd/mgr.c @@ -951,8 +951,6 @@ _fork_all_tasks(slurmd_job_t *job) error ("Unable to return to working directory"); } - jobacct_g_set_proctrack_container_id(job->cont_id); - for (i = 0; i < job->ntasks; i++) { /* * Put this task in the step process group @@ -981,6 +979,7 @@ _fork_all_tasks(slurmd_job_t *job) return SLURM_ERROR; } } + jobacct_g_set_proctrack_container_id(job->cont_id); /* * Now it's ok to unblock the tasks, so they may call exec. @@ -1696,6 +1695,9 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job, return -1; } + if (slurm_container_create(job) != SLURM_SUCCESS) + error("slurm_container_create: %m"); + if ((cpid = fork()) < 0) { error ("executing %s: fork: %m", name); return -1; @@ -1726,6 +1728,8 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job, exit(127); } + if (slurm_container_add(job, cpid) != SLURM_SUCCESS) + error("slurm_container_add: %m"); if (max_wait < 0) opt = 0; else @@ -1737,7 +1741,8 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job, if (errno == EINTR) continue; error("waidpid: %m"); - return 0; + status = 0; + break; } else if (rc == 0) { sleep(1); if ((--max_wait) == 0) { @@ -1745,10 +1750,13 @@ _run_script_as_user(const char *name, const char *path, slurmd_job_t *job, opt = 0; } } else { - killpg(cpid, SIGKILL); /* kill children too */ - return status; + /* spawned process exited */ + break; } } - - /* NOTREACHED */ + /* Insure that all child processes get killed */ + killpg(cpid, SIGKILL); + slurm_container_signal(job->cont_id, SIGKILL); + + return status; } diff --git a/src/srun/opt.c b/src/srun/opt.c index 9c569c7157ba4f77e361010244c44e605a4270b8..e0188bef21b6c99b474c968bab35c7c81f7c8c63 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -2261,6 +2261,13 @@ static bool _opt_verify(void) } } else if (opt.nodes_set && opt.nprocs_set) { + /* + * Make sure in a non allocate situation that + * the number of max_nodes is <= number of tasks + */ + if (!opt.allocate && opt.nprocs < opt.max_nodes) + opt.max_nodes = opt.nprocs; + /* * make sure # of procs >= min_nodes */ @@ -2282,7 +2289,8 @@ static bool _opt_verify(void) host = hostlist_pop(hl); free(host); } - hostlist_ranged_string(hl, strlen(opt.nodelist)+1, + hostlist_ranged_string(hl, + strlen(opt.nodelist)+1, opt.nodelist); } } diff --git a/testsuite/expect/globals b/testsuite/expect/globals index 4d986fe38b4d76202e5eb05a229b525c3c61561d..0229e75001b8d3f8f33846df2052a0f9955cc486 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -349,6 +349,12 @@ proc wait_for_file { file_name } { for {set my_delay 0} {$my_delay <= $max_file_delay} {incr my_delay} { if [file exists $file_name] { # Add small delay for I/O buffering + for {} {$my_delay <= $max_file_delay} {incr my_delay} { + if {[file size $file_name] != 0} { + break + } + exec $bin_sleep 1 + } exec $bin_sleep 2 return 0 } diff --git a/testsuite/expect/test1.88 b/testsuite/expect/test1.88 index 7c59a988277c1b4429766336067856e25987452f..0421283b6cfb969c9dd6dbfed2c36ce16061d140 100755 --- a/testsuite/expect/test1.88 +++ b/testsuite/expect/test1.88 @@ -57,6 +57,10 @@ if {[test_front_end] != 0} { send_user "\nWARNING: This test is incompatable with front-end systems\n" exit 0 } +if {[test_aix] == 1} { + send_user "WARNING: Test is incompatible with AIX\n" + exit 0 +} # # Delete left-over program and rebuild it diff --git a/testsuite/expect/test11.5 b/testsuite/expect/test11.5 index 624831f5f06925c62939133f1d4360bab01bad38..a3efb7e5fbf0a39cc3e6a31496fd4ca8aebc087f 100755 --- a/testsuite/expect/test11.5 +++ b/testsuite/expect/test11.5 @@ -159,6 +159,13 @@ if {$matches != 1} { set exit_code 1 } +# Actual checkpoint on AIX only works for tasks launched using POE +if {[test_aix] == 1} { + send_user "WARNING: Further testing is incompatible with AIX\n" + cancel_job $job_id + exit $exit_code +} + # # Create a checkpoint, continue execution # diff --git a/testsuite/expect/test14.7 b/testsuite/expect/test14.7 index 69f4b602b869f538912e965619f34c6ae3719af8..72c265d6759d5518a0d062d744cd0bdcbb66a9b2 100755 --- a/testsuite/expect/test14.7 +++ b/testsuite/expect/test14.7 @@ -130,6 +130,11 @@ if {[wait_for_file $file_err] == 0} { incr matches exp_continue } + -re "not found" { + send_user "These errors are expected, no worries\n" + incr matches + exp_continue + } eof { wait } diff --git a/testsuite/expect/test7.6 b/testsuite/expect/test7.6 index 0247b6b39cc52c961cbba8f277e8e7d3c0f3e46a..c4c6ffcfe9934805de05454fb9a80527a36230e3 100755 --- a/testsuite/expect/test7.6 +++ b/testsuite/expect/test7.6 @@ -54,13 +54,6 @@ set no_bulk "set issue_dgo false; dset TV::bulk_launch_enabled false" print_header $test_id -# -# Put desired SLURM install directory at head of search path for bulk launch -# command to work (runs "srun" without path) -# -set env(PATH) "$slurm_dir/bin:$env(PATH)" -send_user "\n $env(PATH)\n" - # # Test for existence of mpi compiler and totalview # @@ -84,6 +77,17 @@ if {[test_front_end] != 0} { send_user "\nWARNING: This test is incompatable with front-end systems\n" exit 0 } +if {[test_aix] == 1} { + send_user "WARNING: Test is incompatible with AIX\n" + exit 0 +} + +# +# Put desired SLURM install directory at head of search path for bulk launch +# command to work (runs "srun" without path) +# +set env(PATH) "$slurm_dir/bin:$env(PATH)" +send_user "\n $env(PATH)\n" # # Delete left-over program and rebuild it