diff --git a/NEWS b/NEWS index 5fd911fb9a822797f3ba66ae9be69c0536bf047b..fceb6e6b1439304a202cefad17b8b019c3023ea5 100644 --- a/NEWS +++ b/NEWS @@ -20,6 +20,9 @@ documents those changes that are of interest to users and admins. -- Reorder get config logic to avoid deadlock. -- Enforce QOS MaxCPUsMin limit when job submission contains no user-specified time limit. + -- EpilogSlurmctld pthread is passed required arguments rather than a pointer + to the job record, which under some conditions could be purged and result + in an invalid memory reference. * Changes in Slurm 2.5.7 ======================== diff --git a/src/slurmctld/job_scheduler.c b/src/slurmctld/job_scheduler.c index 1694a896eae1df12ae79ff1d9cb7a697a09965f2..ff77b95e68130bcf6ad5c9bddc9520e42aad32c1 100644 --- a/src/slurmctld/job_scheduler.c +++ b/src/slurmctld/job_scheduler.c @@ -83,6 +83,12 @@ #define _DEBUG 0 #define MAX_RETRIES 10 +typedef struct epilog_arg { + char *epilog_slurmctld; + uint32_t job_id; + char **my_env; +} epilog_arg_t; + static char ** _build_env(struct job_record *job_ptr); static void _depend_list_del(void *dep_ptr); static void _feature_list_delete(void *x); @@ -1911,6 +1917,7 @@ extern int epilog_slurmctld(struct job_record *job_ptr) int rc; pthread_t thread_id_epilog; pthread_attr_t thread_attr_epilog; + epilog_arg_t *epilog_arg; if ((slurmctld_conf.epilog_slurmctld == NULL) || (slurmctld_conf.epilog_slurmctld[0] == '\0')) @@ -1921,13 +1928,18 @@ extern int epilog_slurmctld(struct job_record *job_ptr) return errno; } + epilog_arg = xmalloc(sizeof(epilog_arg_t)); + epilog_arg->job_id = job_ptr->job_id; + epilog_arg->epilog_slurmctld = xstrdup(slurmctld_conf.epilog_slurmctld); + epilog_arg->my_env = _build_env(job_ptr); + slurm_attr_init(&thread_attr_epilog); pthread_attr_setdetachstate(&thread_attr_epilog, PTHREAD_CREATE_DETACHED); while (1) { rc = pthread_create(&thread_id_epilog, &thread_attr_epilog, - _run_epilog, (void *) job_ptr); + _run_epilog, (void *) epilog_arg); if (rc == 0) { slurm_attr_destroy(&thread_attr_epilog); return SLURM_SUCCESS; @@ -2005,21 +2017,13 @@ static char **_build_env(struct job_record *job_ptr) static void *_run_epilog(void *arg) { - struct job_record *job_ptr = (struct job_record *) arg; - uint32_t job_id; + epilog_arg_t *epilog_arg = (epilog_arg_t *) arg; pid_t cpid; int i, status, wait_rc; - char *argv[2], **my_env; - /* Locks: Read config, job */ - slurmctld_lock_t config_read_lock = { - READ_LOCK, READ_LOCK, NO_LOCK, NO_LOCK }; + char *argv[2]; - lock_slurmctld(config_read_lock); - argv[0] = xstrdup(slurmctld_conf.epilog_slurmctld); + argv[0] = epilog_arg->epilog_slurmctld; argv[1] = NULL; - my_env = _build_env(job_ptr); - job_id = job_ptr->job_id; - unlock_slurmctld(config_read_lock); if ((cpid = fork()) < 0) { error("epilog_slurmctld fork error: %m"); @@ -2031,7 +2035,7 @@ static void *_run_epilog(void *arg) #else setpgrp(); #endif - execve(argv[0], argv, my_env); + execve(argv[0], argv, epilog_arg->my_env); exit(127); } @@ -2049,14 +2053,18 @@ static void *_run_epilog(void *arg) } if (status != 0) { error("epilog_slurmctld job %u epilog exit status %u:%u", - job_id, WEXITSTATUS(status), WTERMSIG(status)); - } else - debug2("epilog_slurmctld job %u epilog completed", job_id); + epilog_arg->job_id, WEXITSTATUS(status), + WTERMSIG(status)); + } else { + debug2("epilog_slurmctld job %u epilog completed", + epilog_arg->job_id); + } - fini: xfree(argv[0]); - for (i=0; my_env[i]; i++) - xfree(my_env[i]); - xfree(my_env); + fini: xfree(epilog_arg->epilog_slurmctld); + for (i=0; epilog_arg->my_env[i]; i++) + xfree(epilog_arg->my_env[i]); + xfree(epilog_arg->my_env); + xfree(epilog_arg); return NULL; }