diff --git a/NEWS b/NEWS index b33eb32e762227c3f839ea5f1616894369a88d79..85a97e2756202d52e3fff817e11c0407e3ffee0d 100644 --- a/NEWS +++ b/NEWS @@ -264,6 +264,9 @@ documents those changes that are of interest to users and administrators. -- Make full node reservations display correctly the core count instead of cpu count. -- Preserve original errno on execve() failure in task plugin. + -- Add SLURM_JOB_NAME env variable to an salloc's environment. + -- Overwrite SLURM_JOB_NAME in an srun when it gets an allocation. + -- Make sure each job has a wckey if that is something that is tracked. * Changes in Slurm 14.11.6 ========================== diff --git a/src/common/env.c b/src/common/env.c index c0148865ce36b76dd177934c9ca54b12d131696c..3495c618617df51180387d565623701662044d31 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -670,6 +670,13 @@ int setup_env(env_t *env, bool preserve_env) } } + if (env->job_name) { + if (setenvf(&env->env, "SLURM_JOB_NAME", "%s", env->job_name)) { + error("Unable to set SLURM_JOB_NAME environment"); + rc = SLURM_FAILURE; + } + } + if (!(cluster_flags & CLUSTER_FLAG_BG) && !(cluster_flags & CLUSTER_FLAG_CRAYXT)) { /* These aren't relavant to a system not using Slurm @@ -960,6 +967,7 @@ extern char *uint32_compressed_to_str(uint32_t array_len, * * Sets the variables: * SLURM_JOB_ID + * SLURM_JOB_NAME * SLURM_JOB_NUM_NODES * SLURM_JOB_NODELIST * SLURM_JOB_CPUS_PER_NODE @@ -1000,6 +1008,7 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, } env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", alloc->job_id); + env_array_overwrite_fmt(dest, "SLURM_JOB_NAME", "%s", desc->name); env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", node_cnt); env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", alloc->node_list); diff --git a/src/common/env.h b/src/common/env.h index a9a4fd541fc0d5c4be90cfa599c69137efb2d3a5..642f38e70cb8093e91f686fd73080a58a5a62bcf 100644 --- a/src/common/env.h +++ b/src/common/env.h @@ -62,6 +62,7 @@ typedef struct env_options { uint16_t comm_port; /* srun's communication port */ slurm_addr_t *cli; /* launch node address */ slurm_addr_t *self; + char *job_name; /* assigned job name */ int jobid; /* assigned job id */ int stepid; /* assigned step id */ int procid; /* global task id (across nodes) */ diff --git a/src/plugins/accounting_storage/mysql/as_mysql_job.c b/src/plugins/accounting_storage/mysql/as_mysql_job.c index 2b1011562bfdbc9bb0e6ffc05f5f74903cc8adf4..e143a970eed0d57c3e61b6c1ccf07f41c459226a 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_job.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_job.c @@ -436,10 +436,8 @@ no_rollup_change: #endif } - /* If there is a start_time get the wckeyid. If the job is - * cancelled before the job starts we also want to grab it. */ - if (job_ptr->assoc_id - && (job_ptr->start_time || IS_JOB_CANCELLED(job_ptr))) + /* Grab the wckey once to make sure it is placed. */ + if (job_ptr->assoc_id && (!job_ptr->db_index || job_ptr->wckey)) wckeyid = _get_wckeyid(mysql_conn, &job_ptr->wckey, job_ptr->user_id, mysql_conn->cluster_name, @@ -462,13 +460,15 @@ no_rollup_change: query = xstrdup_printf( "insert into \"%s_%s\" " "(id_job, id_array_job, id_array_task, " - "id_assoc, id_qos, id_wckey, id_user, " + "id_assoc, id_qos, id_user, " "id_group, nodelist, id_resv, timelimit, " "time_eligible, time_submit, time_start, " "job_name, track_steps, state, priority, cpus_req, " "nodes_alloc, mem_req", mysql_conn->cluster_name, job_table); + if (wckeyid) + xstrcat(query, ", id_wckey"); if (job_ptr->account) xstrcat(query, ", account"); if (partition) @@ -493,12 +493,12 @@ no_rollup_change: xstrcat(query, ", tres_alloc"); xstrfmtcat(query, - ") values (%u, %u, %u, %u, %u, %u, %u, %u, " + ") values (%u, %u, %u, %u, %u, %u, %u, " "'%s', %u, %u, %ld, %ld, %ld, " "'%s', %u, %u, %u, %u, %u, %u", job_ptr->job_id, job_ptr->array_job_id, job_ptr->array_task_id, job_ptr->assoc_id, - job_ptr->qos_id, wckeyid, + job_ptr->qos_id, job_ptr->user_id, job_ptr->group_id, nodes, job_ptr->resv_id, job_ptr->time_limit, begin_time, submit_time, start_time, @@ -507,6 +507,8 @@ no_rollup_change: node_cnt, job_ptr->details->pn_min_memory); + if (wckeyid) + xstrfmtcat(query, ", %u", wckeyid); if (job_ptr->account) xstrfmtcat(query, ", '%s'", job_ptr->account); if (partition) @@ -535,7 +537,7 @@ no_rollup_change: xstrfmtcat(query, ") on duplicate key update " "job_db_inx=LAST_INSERT_ID(job_db_inx), " - "id_wckey=%u, id_user=%u, id_group=%u, " + "id_user=%u, id_group=%u, " "nodelist='%s', id_resv=%u, timelimit=%u, " "time_submit=%ld, time_eligible=%ld, " "time_start=%ld, " @@ -543,7 +545,7 @@ no_rollup_change: "state=greatest(state, %u), priority=%u, " "cpus_req=%u, nodes_alloc=%u, " "mem_req=%u, id_array_job=%u, id_array_task=%u", - wckeyid, job_ptr->user_id, job_ptr->group_id, nodes, + job_ptr->user_id, job_ptr->group_id, nodes, job_ptr->resv_id, job_ptr->time_limit, submit_time, begin_time, start_time, jname, track_steps, job_ptr->qos_id, job_state, @@ -553,6 +555,8 @@ no_rollup_change: job_ptr->array_job_id, job_ptr->array_task_id); + if (wckeyid) + xstrfmtcat(query, ", id_wckey=%u", wckeyid); if (job_ptr->account) xstrfmtcat(query, ", account='%s'", job_ptr->account); if (partition) @@ -603,6 +607,8 @@ no_rollup_change: mysql_conn->cluster_name, job_table, nodes); + if (wckeyid) + xstrfmtcat(query, ", id_wckey=%u", wckeyid); if (job_ptr->account) xstrfmtcat(query, "account='%s', ", job_ptr->account); if (partition) @@ -634,13 +640,13 @@ no_rollup_change: xstrfmtcat(query, "time_start=%ld, job_name='%s', state=%u, " "nodes_alloc=%u, id_qos=%u, " - "id_assoc=%u, id_wckey=%u, id_resv=%u, " + "id_assoc=%u, id_resv=%u, " "timelimit=%u, mem_req=%u, " "id_array_job=%u, id_array_task=%u, " "time_eligible=%ld where job_db_inx=%d", start_time, jname, job_state, node_cnt, job_ptr->qos_id, - job_ptr->assoc_id, wckeyid, + job_ptr->assoc_id, job_ptr->resv_id, job_ptr->time_limit, job_ptr->details->pn_min_memory, job_ptr->array_job_id, diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index aeebd3d810bb597e527192e972ff4338ab23ec9b..b72b5062d5353ed5b2ef5b4e6e3d1ab0b72aae8d 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -866,6 +866,7 @@ char *process_options_first_pass(int argc, char **argv) break; case LONG_OPT_WRAP: opt.wrap = xstrdup(optarg); + opt.job_name = xstrdup("wrap"); break; default: /* will be parsed in second pass function */ diff --git a/src/srun/srun.c b/src/srun/srun.c index 2a2cc984394f02fb779703a0d75abbd64f539c6e..cad6fd81585a1c15d2ef358a1dc337ac06a50d0a 100644 --- a/src/srun/srun.c +++ b/src/srun/srun.c @@ -216,6 +216,8 @@ int srun(int ac, char **av) env->labelio = opt.labelio; env->comm_port = slurmctld_comm_addr.port; env->batch_flag = 0; + if (opt.job_name) + env->job_name = opt.job_name; if (job) { uint16_t *tasks = NULL; slurm_step_ctx_get(job->step_ctx, SLURM_STEP_CTX_TASKS,