diff --git a/NEWS b/NEWS index 9152c6b9349e22269154d73d20d96bdf0519c379..b7bf53a61fe150a54300b0c8b792a563b204bc60 100644 --- a/NEWS +++ b/NEWS @@ -75,6 +75,11 @@ documents those changes that are of interest to users and admins. the job's task count and maximum CPUs per node. -- If running jobacct_gather/none fix issue on unpacking step completion. -- Reservation with CoreCnt: Avoid possible invalid memory reference. + -- sjstat - Add man page when generating rpms. + -- Make sure GrpCPURunMins is added when creating a user, account or QOS with + sacctmgr. + -- Fix for invalid memory reference due to multiple free calls caused by + job arrays submitted to multiple partitions. * Changes in Slurm 2.6.1 ======================== diff --git a/slurm.spec b/slurm.spec index 745bc289500e63610686abdfd09ddeead883da1d..cfa1caf32b3cf285f330aaa5115b8ca9293d1078 100644 --- a/slurm.spec +++ b/slurm.spec @@ -563,6 +563,8 @@ rm -f ${RPM_BUILD_ROOT}%{_libdir}/slurm/switch_nrt.so # Build man pages that are generated directly by the tools rm -f $RPM_BUILD_ROOT/%{_mandir}/man1/sjobexitmod.1 ${RPM_BUILD_ROOT}%{_bindir}/sjobexitmod --roff > $RPM_BUILD_ROOT/%{_mandir}/man1/sjobexitmod.1 +rm -f $RPM_BUILD_ROOT/%{_mandir}/man1/sjstat.1 +${RPM_BUILD_ROOT}%{_bindir}/sjstat --roff > $RPM_BUILD_ROOT/%{_mandir}/man1/sjstat.1 # Build conditional file list for main package LIST=./slurm.files @@ -747,6 +749,7 @@ rm -rf $RPM_BUILD_ROOT %config %{_sysconfdir}/cgroup/release_memory %config %{_sysconfdir}/slurm.epilog.clean %exclude %{_mandir}/man1/sjobexit* +%exclude %{_mandir}/man1/sjstat* %if %{slurm_with blcr} %exclude %{_mandir}/man1/srun_cr* %exclude %{_bindir}/srun_cr @@ -950,6 +953,7 @@ rm -rf $RPM_BUILD_ROOT %files sjstat %defattr(-,root,root) %{_bindir}/sjstat +%{_mandir}/man1/sjstat* ############################################################################# %if %{slurm_with pam} diff --git a/src/common/plugstack.c b/src/common/plugstack.c index 127514ad8eafc42e367e51bfbe6059f2539c652f..3fc2e60ae1b476c8278d2ac4db0583dde8caf098 100644 --- a/src/common/plugstack.c +++ b/src/common/plugstack.c @@ -1685,11 +1685,11 @@ static int tasks_execd (spank_t spank) } static spank_err_t -global_to_local_id (stepd_step_rec_t *job, uint32_t gid, uint32_t *p2uint32) +_global_to_local_id(stepd_step_rec_t *job, uint32_t gid, uint32_t *p2uint32) { int i; *p2uint32 = (uint32_t) -1; - if (gid >= job->ntasks) + if ((job == NULL) || (gid >= job->ntasks)) return (ESPANK_BAD_ARG); for (i = 0; i < job->node_tasks; i++) { if (job->task[i]->gtid == gid) { @@ -1928,8 +1928,13 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) case S_JOB_SUPPLEMENTARY_GIDS: p2gids = va_arg(vargs, gid_t **); p2int = va_arg(vargs, int *); - *p2gids = slurmd_job->gids; - *p2int = slurmd_job->ngids; + if (slurmd_job) { + *p2gids = slurmd_job->gids; + *p2int = slurmd_job->ngids; + } else { + *p2gids = NULL; + *p2int = 0; + } break; case S_JOB_ID: p2uint32 = va_arg(vargs, uint32_t *); @@ -1944,8 +1949,10 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) p2uint32 = va_arg(vargs, uint32_t *); if (spank->stack->type == S_TYPE_LOCAL) *p2uint32 = launcher_job->stepid; - else + else if (slurmd_job) *p2uint32 = slurmd_job->stepid; + else + *p2uint32 = 0; break; case S_JOB_NNODES: p2uint32 = va_arg(vargs, uint32_t *); @@ -1957,16 +1964,24 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) *p2uint32 = 0; rc = ESPANK_ENV_NOEXIST; } - } else + } else if (slurmd_job) *p2uint32 = slurmd_job->nnodes; + else + *p2uint32 = 0; break; case S_JOB_NODEID: p2uint32 = va_arg(vargs, uint32_t *); - *p2uint32 = slurmd_job->nodeid; + if (slurmd_job) + *p2uint32 = slurmd_job->nodeid; + else + *p2uint32 = 0; break; case S_JOB_LOCAL_TASK_COUNT: p2uint32 = va_arg(vargs, uint32_t *); - *p2uint32 = slurmd_job->node_tasks; + if (slurmd_job) + *p2uint32 = slurmd_job->node_tasks; + else + *p2uint32 = 0; break; case S_JOB_TOTAL_TASK_COUNT: p2uint32 = va_arg(vargs, uint32_t *); @@ -1978,16 +1993,24 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) *p2uint32 = 0; rc = ESPANK_ENV_NOEXIST; } - } else + } else if (slurmd_job) *p2uint32 = slurmd_job->ntasks; + else + *p2uint32 = 0; break; case S_JOB_NCPUS: p2uint16 = va_arg(vargs, uint16_t *); - *p2uint16 = slurmd_job->cpus; + if (slurmd_job) + *p2uint16 = slurmd_job->cpus; + else + *p2uint16 = 0; break; case S_STEP_CPUS_PER_TASK: p2uint32 = va_arg(vargs, uint32_t *); - *p2uint32 = slurmd_job->cpus_per_task; + if (slurmd_job) + *p2uint32 = slurmd_job->cpus_per_task; + else + *p2uint32 = 0; break; case S_JOB_ARGV: p2int = va_arg(vargs, int *); @@ -1995,14 +2018,20 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) if (spank->stack->type == S_TYPE_LOCAL) { *p2int = launcher_job->argc; *p2argv = launcher_job->argv; - } else { + } else if (slurmd_job) { *p2int = slurmd_job->argc; *p2argv = slurmd_job->argv; + } else { + *p2int = 0; + *p2argv = NULL; } break; case S_JOB_ENV: p2argv = va_arg(vargs, char ***); - *p2argv = slurmd_job->env; + if (slurmd_job) + *p2argv = slurmd_job->env; + else + *p2argv = NULL; break; case S_TASK_ID: p2int = va_arg(vargs, int *); @@ -2067,7 +2096,7 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) p2uint32 = va_arg(vargs, uint32_t *); *p2uint32 = (uint32_t) -1; - if ((uint32 <= slurmd_job->node_tasks) && + if (slurmd_job && (uint32 <= slurmd_job->node_tasks) && slurmd_job->task && slurmd_job->task[uint32]) { *p2uint32 = slurmd_job->task[uint32]->gtid; } else @@ -2076,23 +2105,35 @@ spank_err_t spank_get_item(spank_t spank, spank_item_t item, ...) case S_JOB_GLOBAL_TO_LOCAL_ID: uint32 = va_arg(vargs, uint32_t); p2uint32 = va_arg(vargs, uint32_t *); - rc = global_to_local_id (slurmd_job, uint32, p2uint32); + rc = _global_to_local_id (slurmd_job, uint32, p2uint32); break; case S_JOB_ALLOC_CORES: p2str = va_arg(vargs, char **); - *p2str = slurmd_job->job_alloc_cores; + if (slurmd_job) + *p2str = slurmd_job->job_alloc_cores; + else + *p2str = NULL; break; case S_JOB_ALLOC_MEM: p2uint32 = va_arg(vargs, uint32_t *); - *p2uint32 = slurmd_job->job_mem; + if (slurmd_job) + *p2uint32 = slurmd_job->job_mem; + else + *p2uint32 = 0; break; case S_STEP_ALLOC_CORES: p2str = va_arg(vargs, char **); - *p2str = slurmd_job->step_alloc_cores; + if (slurmd_job) + *p2str = slurmd_job->step_alloc_cores; + else + *p2str = NULL; break; case S_STEP_ALLOC_MEM: p2uint32 = va_arg(vargs, uint32_t *); - *p2uint32 = slurmd_job->step_mem; + if (slurmd_job) + *p2uint32 = slurmd_job->step_mem; + else + *p2uint32 = 0; break; case S_SLURM_VERSION: p2vers = va_arg(vargs, char **); diff --git a/src/common/proc_args.c b/src/common/proc_args.c index eb9cbc98d3774aa56d30876faa157aca2729e3ed..f4ab279c78f11296da898031f46ce11634f394a9 100644 --- a/src/common/proc_args.c +++ b/src/common/proc_args.c @@ -574,7 +574,7 @@ bool verify_socket_core_thread_count(const char *arg, int *min_sockets, { bool tmp_val,ret_val; int i,j; - int max_sockets, max_cores, max_threads; + int max_sockets = 0, max_cores, max_threads; const char *cur_ptr = arg; char buf[3][48]; /* each can hold INT64_MAX - INT64_MAX */ buf[0][0] = '\0'; diff --git a/src/common/slurm_cred.c b/src/common/slurm_cred.c index 78f17633716c2e484430e4db653f8dd87fb477ad..e5d9a0c326d5f678da9d8bbd76bfded653da9890 100644 --- a/src/common/slurm_cred.c +++ b/src/common/slurm_cred.c @@ -2125,22 +2125,6 @@ sbcast_cred_t *create_sbcast_cred(slurm_cred_ctx_t ctx, return sbcast_cred; } -/* Copy an sbcast credential created using create_sbcast_cred() or - * unpack_sbcast_cred() */ -sbcast_cred_t *copy_sbcast_cred(sbcast_cred_t *sbcast_cred) -{ - sbcast_cred_t *rcred = NULL; - - xassert(sbcast_cred); - rcred->ctime = sbcast_cred->ctime; - rcred->expiration = sbcast_cred->expiration; - rcred->jobid = sbcast_cred->jobid; - rcred->nodes = xstrdup(sbcast_cred->nodes); - rcred->siglen = sbcast_cred->siglen; - rcred->signature = xstrdup(sbcast_cred->signature); - return rcred; -} - /* Delete an sbcast credential created using create_sbcast_cred() or * unpack_sbcast_cred() */ void delete_sbcast_cred(sbcast_cred_t *sbcast_cred) diff --git a/src/common/slurm_cred.h b/src/common/slurm_cred.h index 7937947d70620132c443c7dbbea37096875d3ae5..caa6c78d1e003a66ee42908a50af9cf9a6991f73 100644 --- a/src/common/slurm_cred.h +++ b/src/common/slurm_cred.h @@ -339,7 +339,6 @@ void slurm_cred_print(slurm_cred_t *cred); sbcast_cred_t *create_sbcast_cred(slurm_cred_ctx_t ctx, uint32_t job_id, char *nodes, time_t expiration); -sbcast_cred_t *copy_sbcast_cred(sbcast_cred_t *sbcast_cred); void delete_sbcast_cred(sbcast_cred_t *sbcast_cred); int extract_sbcast_cred(slurm_cred_ctx_t ctx, sbcast_cred_t *sbcast_cred, uint16_t block_no, diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 8466bd2b25d38dd9eb8fbe1017615eb5282b88e3..35a8fe765dccef3b61c82f75463e614371156d5a 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -393,6 +393,7 @@ extern void slurm_free_job_desc_msg(job_desc_msg_t * msg) if (msg) { xfree(msg->account); + xfree(msg->acctg_freq); xfree(msg->alloc_node); if (msg->argv) { for (i = 0; i < msg->argc; i++) diff --git a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c index a7c91e509179287900623eb4e5a8aacf2e286a23..72a6576dbca639491c25ec05737216982364c490 100644 --- a/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c +++ b/src/plugins/acct_gather_energy/rapl/acct_gather_energy_rapl.c @@ -236,7 +236,7 @@ static void _hardware(void) { char buf[1024]; FILE *fd; - int cpu, pkg; + int cpu = 0, pkg = 0; if ((fd = fopen("/proc/cpuinfo", "r")) == 0) fatal("RAPL: error on attempt to open /proc/cpuinfo"); diff --git a/src/plugins/mpi/pmi2/pmi1.c b/src/plugins/mpi/pmi2/pmi1.c index e5a5d67511be359e34056d848db30c854cbd2acd..721903f45d1b4e77c6201a70f030bbf3e4554d0e 100644 --- a/src/plugins/mpi/pmi2/pmi1.c +++ b/src/plugins/mpi/pmi2/pmi1.c @@ -119,7 +119,7 @@ _handle_get_maxes(int fd, int lrank, client_req_t *req) KVSNAMEMAX_KEY"=%d " KEYLENMAX_KEY"=%d " VALLENMAX_KEY"=%d\n", rc, MAXKVSNAME, MAXKEYLEN, MAXVALLEN); - rc = client_resp_send(resp, fd); + (void) client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_get_maxes"); @@ -138,7 +138,7 @@ _handle_get_universe_size(int fd, int lrank, client_req_t *req) client_resp_append(resp, CMD_KEY"="UNIVSIZE_CMD" " RC_KEY"=%d " SIZE_KEY"=%d\n", rc, job_info.ntasks); - rc = client_resp_send(resp, fd); + (void) client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_get_universe_size"); @@ -162,7 +162,7 @@ _handle_get_appnum(int fd, int lrank, client_req_t *req) */ client_resp_append(resp, CMD_KEY"="APPNUM_CMD" " RC_KEY"=%d " APPNUM_KEY"=-1\n", rc); - rc = client_resp_send(resp, fd); + (void) client_resp_send(resp, fd); client_resp_free(resp); debug3("mpi/pmi2: out _handle_get_appnum"); @@ -582,8 +582,10 @@ _handle_pmi1_mcmd_buf(int fd, int lrank, int buf_size, int buf_len, char **pbuf) tmp_ptr = NULL; while (tmp_buf[0] != '\0') { tmp_ptr = strstr(tmp_buf, ENDCMD_KEY"\n"); - if ( tmp_ptr == NULL) { + if (tmp_ptr == NULL) { error("mpi/pmi2: this is impossible"); + rc = SLURM_ERROR; + break; } *tmp_ptr = '\0'; n = tmp_ptr - tmp_buf; diff --git a/src/plugins/sched/wiki/get_nodes.c b/src/plugins/sched/wiki/get_nodes.c index 4acf1385505b333f83e87a33a3fdcba276636998..57a1e6c73b293598e8b1202249272f92a60a26d9 100644 --- a/src/plugins/sched/wiki/get_nodes.c +++ b/src/plugins/sched/wiki/get_nodes.c @@ -215,7 +215,7 @@ static char * _dump_node(struct node_record *node_ptr, time_t update_time) if (update_time > 0) return buf; - if (slurmctld_conf.fast_schedule) { + if (slurmctld_conf.fast_schedule && node_ptr->config_ptr) { /* config from slurm.conf */ snprintf(tmp, sizeof(tmp), "CMEMORY=%u;CDISK=%u;CPROC=%u;", diff --git a/src/plugins/sched/wiki2/get_nodes.c b/src/plugins/sched/wiki2/get_nodes.c index 9bebe8e610c1b2fa8fa1d49082bb894c8f4bef50..c62d263b9ed7bfc7a1c5f0347fbf4044bfc2aae6 100644 --- a/src/plugins/sched/wiki2/get_nodes.c +++ b/src/plugins/sched/wiki2/get_nodes.c @@ -401,7 +401,7 @@ static char * _dump_node(struct node_record *node_ptr, hostlist_t hl, if (update_time > 0) return buf; - if (slurmctld_conf.fast_schedule) { + if (slurmctld_conf.fast_schedule && node_ptr->config_ptr) { /* config from slurm.conf */ snprintf(tmp, sizeof(tmp), "CMEMORY=%u;CDISK=%u;CPROC=%u;", diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index 0c701d90dc18df66149172ece1c84435099d0a34..9c8d197cbc028903206869e841b43e35d22bf1cc 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -320,7 +320,6 @@ uint16_t _allocate_sockets(struct job_record *job_ptr, bitstr_t *core_map, if (cpus_per_task < 2) { avail_cpus = num_tasks; - cps = num_tasks; } else { j = avail_cpus / cpus_per_task; num_tasks = MIN(num_tasks, j); @@ -2474,7 +2473,7 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, * avail_cores = static core_bitmap of all available cores */ - if (jp_ptr->row == NULL) { + if (!jp_ptr || !jp_ptr->row) { /* there's no existing jobs in this partition, so place * the job in avail_cores. FIXME: still need a good * placement algorithm here that optimizes "job overlap" diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index 34bf8ae9d1f530f88be445f66eb60bfa271ec98b..bc6f08e180c1c554bcb67acea022e9b6d1d94520 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -2004,7 +2004,7 @@ extern int select_p_job_test(struct job_record *job_ptr, bitstr_t * bitmap, { int rc = EINVAL; uint16_t job_node_req; - bool debug_cpu_bind = false, debug_check = false; + static bool debug_cpu_bind = false, debug_check = false; xassert(bitmap); @@ -2783,7 +2783,7 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, bitstr_t *avail_nodes_bitmap = NULL; /* nodes on any switch */ bitstr_t *sp_avail_bitmap; - int rem_nodes, rem_cores; /* remaining resources desired */ + int rem_nodes, rem_cores = 0; /* remaining resources desired */ int i, j; int best_fit_inx, first, last; int best_fit_nodes; @@ -2807,12 +2807,12 @@ extern bitstr_t * select_p_resv_test(bitstr_t *avail_bitmap, uint32_t node_cnt, *core_bitmap = _make_core_bitmap_filtered(avail_bitmap, 0); rem_nodes = node_cnt; - rem_cores = core_cnt[0]; /* Assuming symmetric cluster */ - if (core_cnt) + if (core_cnt) { + rem_cores = core_cnt[0]; cores_per_node = core_cnt[0] / MAX(node_cnt, 1); - else if (cr_node_num_cores) + } else if (cr_node_num_cores) cores_per_node = cr_node_num_cores[0]; else cores_per_node = 1; diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index e7856089e44fcdeb5ef751824d2e49741453ae1b..6958162ec57f304952f68ce09c7b6e5e4e81266c 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -1816,9 +1816,10 @@ static int _decr_node_job_cnt(int node_inx, struct job_record *job_ptr, { struct node_record *node_ptr = node_record_table_ptr + node_inx; struct part_cr_record *part_cr_ptr; - bool exclusive, is_job_running; + bool exclusive = false, is_job_running; - exclusive = (job_ptr->details->shared == 0); + if (job_ptr->details) + exclusive = (job_ptr->details->shared == 0); if (exclusive) { if (cr_ptr->nodes[node_inx].exclusive_cnt) cr_ptr->nodes[node_inx].exclusive_cnt--; @@ -2260,7 +2261,10 @@ static void _init_node_cr(void) if (job_resrcs_ptr->node_bitmap == NULL) continue; - exclusive = (job_ptr->details->shared == 0); + if (job_ptr->details) + exclusive = (job_ptr->details->shared == 0); + else + exclusive = 0; node_offset = -1; i_first = bit_ffs(job_resrcs_ptr->node_bitmap); i_last = bit_fls(job_resrcs_ptr->node_bitmap); diff --git a/src/plugins/select/serial/job_test.c b/src/plugins/select/serial/job_test.c index 136213ac031fda8853758e6ca805ec4cd3069cbd..0f84e20eeb5c28de801900266c36fffcce2792e3 100644 --- a/src/plugins/select/serial/job_test.c +++ b/src/plugins/select/serial/job_test.c @@ -747,7 +747,7 @@ extern int cr_job_test(struct job_record *job_ptr, bitstr_t *bitmap, int mode, * avail_cores = static core_bitmap of all available cores */ - if (jp_ptr->row == NULL) { + if (!jp_ptr || !jp_ptr->row) { /* there's no existing jobs in this partition, so place * the job in avail_cores. FIXME: still need a good * placement algorithm here that optimizes "job overlap" diff --git a/src/sacctmgr/account_functions.c b/src/sacctmgr/account_functions.c index 6500afdbcbb1a17b25b2ce1b75a0ef7b5fa945cb..39ec6bb070561661f83bfad43f73265ef0853539 100644 --- a/src/sacctmgr/account_functions.c +++ b/src/sacctmgr/account_functions.c @@ -600,6 +600,7 @@ extern int sacctmgr_add_account(int argc, char *argv[]) assoc->shares_raw = start_assoc->shares_raw; assoc->grp_cpu_mins = start_assoc->grp_cpu_mins; + assoc->grp_cpu_run_mins = start_assoc->grp_cpu_run_mins; assoc->grp_cpus = start_assoc->grp_cpus; assoc->grp_jobs = start_assoc->grp_jobs; assoc->grp_mem = start_assoc->grp_mem; diff --git a/src/sacctmgr/qos_functions.c b/src/sacctmgr/qos_functions.c index 0e682625523dc7e5ef847553f6d8afca76dd129b..16f97e9f7816d5af861200e93f0548d1c4af2680 100644 --- a/src/sacctmgr/qos_functions.c +++ b/src/sacctmgr/qos_functions.c @@ -257,6 +257,8 @@ static int _set_rec(int *start, int argc, char *argv[], slurm_addto_char_list(name_list, argv[i]+end); } else if (!strncasecmp (argv[i], "Description", MAX(command_len, 1))) { + if (!qos) + continue; if (!qos->description) qos->description = strip_quotes(argv[i]+end, NULL, 1); @@ -629,6 +631,7 @@ extern int sacctmgr_add_qos(int argc, char *argv[]) qos->flags = start_qos->flags; qos->grace_time = start_qos->grace_time; qos->grp_cpu_mins = start_qos->grp_cpu_mins; + qos->grp_cpu_run_mins = start_qos->grp_cpu_run_mins; qos->grp_cpus = start_qos->grp_cpus; qos->grp_jobs = start_qos->grp_jobs; qos->grp_mem = start_qos->grp_mem; diff --git a/src/sacctmgr/user_functions.c b/src/sacctmgr/user_functions.c index ba69455d79902bbe82230e490b43e5dc3671f929..198e3b4bf807f7886cd72756f77ad6928a7df3d0 100644 --- a/src/sacctmgr/user_functions.c +++ b/src/sacctmgr/user_functions.c @@ -1063,6 +1063,8 @@ extern int sacctmgr_add_user(int argc, char *argv[]) assoc->grp_cpu_mins = start_assoc.grp_cpu_mins; + assoc->grp_cpu_run_mins = + start_assoc.grp_cpu_run_mins; assoc->grp_cpus = start_assoc.grp_cpus; assoc->grp_jobs = start_assoc.grp_jobs; assoc->grp_mem = start_assoc.grp_mem; @@ -1132,6 +1134,8 @@ extern int sacctmgr_add_user(int argc, char *argv[]) assoc->grp_cpu_mins = start_assoc.grp_cpu_mins; + assoc->grp_cpu_run_mins = + start_assoc.grp_cpu_run_mins; assoc->grp_cpus = start_assoc.grp_cpus; assoc->grp_jobs = start_assoc.grp_jobs; assoc->grp_mem = start_assoc.grp_mem; diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index ded420cadead48e09adc79ee5b951dbd378f1236..e4588ae3167718135c7161606262eff73705e90f 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -270,6 +270,7 @@ void delete_job_details(struct job_record *job_entry) if (IS_JOB_FINISHED(job_entry)) _delete_job_desc_files(job_entry->job_id); + xfree(job_entry->details->acctg_freq); for (i=0; i<job_entry->details->argc; i++) xfree(job_entry->details->argv[i]); xfree(job_entry->details->argv); @@ -2789,13 +2790,15 @@ struct job_record *_job_rec_copy(struct job_record *job_ptr) struct job_record *job_ptr_new = NULL, *save_job_next; struct job_details *job_details, *details_new, *save_details; uint32_t save_job_id; + priority_factors_object_t *save_prio_factors; + List save_step_list; int error_code = SLURM_SUCCESS; int i; job_ptr_new = create_job_record(&error_code); if (!job_ptr_new) /* MaxJobCount checked when job array submitted */ fatal("job array create_job_record error"); - if (!job_ptr_new || (error_code != SLURM_SUCCESS)) + if (error_code != SLURM_SUCCESS) return job_ptr_new; /* Set job-specific ID and hash table */ @@ -2808,10 +2811,15 @@ struct job_record *_job_rec_copy(struct job_record *job_ptr) save_job_id = job_ptr_new->job_id; save_job_next = job_ptr_new->job_next; save_details = job_ptr_new->details; + save_prio_factors = job_ptr_new->prio_factors; + save_step_list = job_ptr_new->step_list; memcpy(job_ptr_new, job_ptr, sizeof(struct job_record)); job_ptr_new->job_id = save_job_id; job_ptr_new->job_next = save_job_next; job_ptr_new->details = save_details; + job_ptr_new->prio_factors = save_prio_factors; + job_ptr_new->step_list = save_step_list; + job_ptr_new->account = xstrdup(job_ptr->account); job_ptr_new->alias_list = xstrdup(job_ptr->alias_list); job_ptr_new->alloc_node = xstrdup(job_ptr->alloc_node); @@ -2848,12 +2856,11 @@ struct job_record *_job_rec_copy(struct job_record *job_ptr) job_ptr_new->node_bitmap_cg = bit_copy(job_ptr->node_bitmap_cg); job_ptr_new->nodes_completing = xstrdup(job_ptr->nodes_completing); job_ptr_new->partition = xstrdup(job_ptr->partition); - job_ptr_new->profile = job_ptr->profile; job_ptr_new->part_ptr_list = part_list_copy(job_ptr->part_ptr_list); - if (job_ptr->prio_factors) { - i = sizeof(priority_factors_object_t); - job_ptr_new->prio_factors = xmalloc(i); - memcpy(job_ptr_new->prio_factors, job_ptr->prio_factors, i); + if (job_ptr->part_ptr_list) { + i = list_count(job_ptr->part_ptr_list) * sizeof(uint32_t); + job_ptr_new->priority_array = xmalloc(i); + memcpy(job_ptr_new->priority_array, job_ptr->priority_array, i); } job_ptr_new->resv_name = xstrdup(job_ptr->resv_name); job_ptr_new->resp_host = xstrdup(job_ptr->resp_host); @@ -2871,12 +2878,12 @@ struct job_record *_job_rec_copy(struct job_record *job_ptr) } } job_ptr_new->state_desc = xstrdup(job_ptr->state_desc); - job_ptr_new->step_list = list_create(NULL); job_ptr_new->wckey = xstrdup(job_ptr->wckey); job_details = job_ptr->details; details_new = job_ptr_new->details; memcpy(details_new, job_details, sizeof(struct job_details)); + details_new->acctg_freq = xstrdup(job_details->acctg_freq); if (job_details->argc) { details_new->argv = xmalloc(sizeof(char *) * (job_details->argc + 1)); diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 93f81a9ee52b9723cb11dbb7771fadaf11c423d4..17bc2067109c54276f0bada22d247219d9e58f81 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -840,7 +840,7 @@ static void _slurm_rpc_allocate_resources(slurm_msg_t * msg) int immediate = job_desc_msg->immediate; bool do_unlock = false; bool job_waiting = false; - struct job_record *job_ptr; + struct job_record *job_ptr = NULL; uint16_t port; /* dummy value */ slurm_addr_t resp_addr; char *err_msg = NULL; diff --git a/src/slurmd/slurmstepd/io.c b/src/slurmd/slurmstepd/io.c index 520a465c284c80e4b2ba32b59452539a894f77d9..20d900e7eef7c7a7ac68e3d583caff986a272826 100644 --- a/src/slurmd/slurmstepd/io.c +++ b/src/slurmd/slurmstepd/io.c @@ -555,7 +555,7 @@ _local_file_write(eio_obj_t *obj, List objs) if (!header_tmp_buf) fatal("Failure to allocate memory for a message header"); io_hdr_unpack(&header, header_tmp_buf); - header_tmp_buf->head = NULL; + header_tmp_buf->head = NULL; /* CLANG false positive bug here */ free_buf(header_tmp_buf); /* A zero-length message indicates the end of a stream from one @@ -1269,7 +1269,7 @@ _build_connection_okay_message(stepd_step_rec_t *job) msg->ref_count = 0; /* make certain it is initialized */ /* free the Buf packbuf, but not the memory to which it points */ - packbuf->head = NULL; + packbuf->head = NULL; /* CLANG false positive bug here */ free_buf(packbuf); return msg; @@ -1753,7 +1753,7 @@ _send_eof_msg(struct task_read_info *out) msg->ref_count = 0; /* make certain it is initialized */ /* free the Buf packbuf, but not the memory to which it points */ - packbuf->head = NULL; + packbuf->head = NULL; /* CLANG false positive bug here */ free_buf(packbuf); /* Add eof message to the msg_queue of all clients */ @@ -1846,7 +1846,7 @@ _task_build_message(struct task_read_info *out, stepd_step_rec_t *job, cbuf_t cb msg->ref_count = 0; /* make certain it is initialized */ /* free the Buf packbuf, but not the memory to which it points */ - packbuf->head = NULL; + packbuf->head = NULL; /* CLANG false positive bug here */ free_buf(packbuf); debug4("Leaving _task_build_message"); diff --git a/src/srun/libsrun/allocate.c b/src/srun/libsrun/allocate.c index 82cff17d2b13dcafaa54ea34a54e3308f25d58f4..8aa1cdaf440797ea92836e7a884fe252e2d27cf1 100644 --- a/src/srun/libsrun/allocate.c +++ b/src/srun/libsrun/allocate.c @@ -478,10 +478,11 @@ allocate_nodes(bool handle_signals) return resp; relinquish: - - slurm_free_resource_allocation_response_msg(resp); - if (!destroy_job) - slurm_complete_job(resp->job_id, 1); + if (resp) { + if (!destroy_job) + slurm_complete_job(resp->job_id, 1); + slurm_free_resource_allocation_response_msg(resp); + } exit(error_exit); return NULL; } diff --git a/src/srun/libsrun/opt.c b/src/srun/libsrun/opt.c index e3bf250bf285cdf4e1698a1e93470d00494d84f0..3f98678b46fa81ebf56c212197580643e1614d4f 100644 --- a/src/srun/libsrun/opt.c +++ b/src/srun/libsrun/opt.c @@ -1700,7 +1700,7 @@ static void _opt_args(int argc, char **argv) xassert((command_pos + command_args) <= opt.argc); for (i = command_pos; i < opt.argc; i++) { - if (!rest[i-command_pos]) + if (!rest || !rest[i-command_pos]) break; opt.argv[i] = xstrdup(rest[i-command_pos]); }