diff --git a/NEWS b/NEWS index 0271c2d1c5ba909b20aaf8b4988344c4a52c5143..0df7ead7bc8ad2b6217da804231817a5b927840e 100644 --- a/NEWS +++ b/NEWS @@ -218,7 +218,10 @@ documents those changes that are of interest to users and admins. are created in SLURM tables for future use without a reboot of the SLURM daemons, but are not reported by any SLURM commands or APIs. -* Changes in SLURM 1.3.14 +* Changes in SLURM 1.3.16 +========================= + +* Changes in SLURM 1.3.15 ========================= -- Fix bug in squeue command with sort on job name ("-S j" option) for jobs that lack a name. Previously generated an invalid memory reference. @@ -260,15 +263,20 @@ documents those changes that are of interest to users and admins. accepted but held until nodes are back up. -- Fix in accounting so if any nodes are removed from the system when they were previously down will be recorded correctly. - -- For sched/wiki2 (Moab), add flag to note if job is restartable. + -- For sched/wiki2 (Moab), add flag to note if job is restartable and + prevent deadlock of job requeue fails. -- Modify squeue to return non-zero exit code on failure. Patch from Par Andersson (NSC). -- Correct logic in select/cons_res to allocate a job the maximum node count from a range rather than minimum (e.g. "sbatch -N1-4 my.sh"). -- In accounting_storage/filetxt and accounting_storage/pgsql fix possible invalid memory reference when a job lacks a name. + +* Changes in SLURM 1.3.14 +========================= -- SECURITY BUG: Fix in sbcast logic that permits users to write files based - upon supplimental groups of the slurmd daemon. + upon supplimental groups of the slurmd daemon. Similar logic for event + triggers if slurmctld is run as user root (not typical). * Changes in SLURM 1.3.13 ========================= diff --git a/src/plugins/sched/wiki2/job_requeue.c b/src/plugins/sched/wiki2/job_requeue.c index 7829820f3db151ba057f807fdb83495c5f1971c4..539aa994244875d1cee6e9dc5fec5b6f3d0000f3 100644 --- a/src/plugins/sched/wiki2/job_requeue.c +++ b/src/plugins/sched/wiki2/job_requeue.c @@ -45,6 +45,7 @@ extern int job_requeue_wiki(char *cmd_ptr, int *err_code, char **err_msg) { char *arg_ptr, *tmp_char; uint32_t jobid; + struct job_record *job_ptr; static char reply_msg[128]; int slurm_rc; /* Write lock on job and node info */ @@ -68,24 +69,23 @@ extern int job_requeue_wiki(char *cmd_ptr, int *err_code, char **err_msg) lock_slurmctld(job_write_lock); slurm_rc = job_requeue(0, jobid, -1); - if (slurm_rc == SLURM_SUCCESS) { - /* We need to clear the required node list here. - * If the job was submitted with srun and a - * required node list, it gets lost here. */ - struct job_record *job_ptr; - job_ptr = find_job_record(jobid); - if (job_ptr && job_ptr->details) { - xfree(job_ptr->details->req_nodes); - FREE_NULL_BITMAP(job_ptr->details-> - req_node_bitmap); - } - info("wiki: requeued job %u", jobid); - } else { + if (slurm_rc != SLURM_SUCCESS) { + unlock_slurmctld(job_write_lock); *err_code = -700; *err_msg = slurm_strerror(slurm_rc); error("wiki: Failed to requeue job %u (%m)", jobid); return -1; } + + /* We need to clear the required node list here. + * If the job was submitted with srun and a + * required node list, it gets lost here. */ + job_ptr = find_job_record(jobid); + if (job_ptr && job_ptr->details) { + xfree(job_ptr->details->req_nodes); + FREE_NULL_BITMAP(job_ptr->details->req_node_bitmap); + } + info("wiki: requeued job %u", jobid); unlock_slurmctld(job_write_lock); snprintf(reply_msg, sizeof(reply_msg), "job %u requeued successfully", jobid); diff --git a/src/slurmctld/trigger_mgr.c b/src/slurmctld/trigger_mgr.c index 365ff3af0bd7dd9ddfdebbc3e6167b2e0cae7b0a..b92bbb98b785c51622a7d46fc3a8b7ad0aea0239 100644 --- a/src/slurmctld/trigger_mgr.c +++ b/src/slurmctld/trigger_mgr.c @@ -981,9 +981,18 @@ static void _trigger_run_program(trig_mgr_info_t *trig_in) setpgrp(); #endif setsid(); - setuid(uid); - setgid(gid); - initgroups(user_name, -1); + if (initgroups(user_name, gid) == -1) { + error("trigger: initgroups: %m"); + exit(1); + } + if (setgid(uid) == -1) { + error("trigger: setgid: %m"); + exit(1); + } + if (setuid(gid) == -1) { + error("trigger: setuid: %m"); + exit(1); + } execl(program, arg0, arg1, NULL); exit(1); } else