diff --git a/NEWS b/NEWS index abe3325aa75e571d84f4dbb3b49da40871c950d0..844aa1398c225f84e5fdc0065dae9c474f9ae369 100644 --- a/NEWS +++ b/NEWS @@ -42,6 +42,8 @@ documents those changes that are of interest to users and admins. * Changes in Slurm 14.03.3 ========================== + -- Correction to default batch output file name. In version 14.03.2 was using + "slurm_<jobid>_4294967294.out" due to error in job array logic. * Changes in Slurm 14.03.2 ========================== diff --git a/auxdir/x_ac_lua.m4 b/auxdir/x_ac_lua.m4 index d5a7a2b7fc0cb20d8dd5c99421d1180bba02334d..61648b5939652b8bcd53fe55a84c52c2c7a91529 100644 --- a/auxdir/x_ac_lua.m4 +++ b/auxdir/x_ac_lua.m4 @@ -31,7 +31,7 @@ AC_DEFUN([X_AC_LUA], #include <lauxlib.h> #include <lualib.h> ], - [lua_State *L = luaL_newstate (); + [lua_State *L = luaL_newstate (); luaL_openlibs(L); ], [], [x_ac_have_lua="no"]) diff --git a/configure b/configure index 750aab664ffb3a8abee47a26f34d7b53ed77ef17..3f851a090b2b96bf44e55eb4284a33fe0f076aad 100755 --- a/configure +++ b/configure @@ -22846,7 +22846,7 @@ $as_echo_n "checking for whether we can link to liblua... " >&6; } int main () { -lua_State *L = luaL_newstate (); +lua_State *L = luaL_newstate (); luaL_openlibs(L); ; return 0; diff --git a/contribs/lua/job_submit.license.lua b/contribs/lua/job_submit.license.lua index 991c31b81af5b73ba7b9cef0a6634153f82383b1..c0d6c31b4629d0ea71bb4e3ae56492319f42d4b2 100644 --- a/contribs/lua/job_submit.license.lua +++ b/contribs/lua/job_submit.license.lua @@ -41,8 +41,7 @@ function slurm_job_submit ( job_desc, part_list, submit_uid ) if bad_license_count > 0 then log_info("slurm_job_submit: for user %d, invalid licenses value: %s", job_desc.user_id, job_desc.licenses) --- ESLURM_INVALID_LICENSES is 2048 - return 2048 + return slurm.ESLURM_INVALID_LICENSES end return 0 @@ -60,8 +59,7 @@ function slurm_job_modify ( job_desc, job_rec, part_list, modify_uid ) if bad_license_count > 0 then log_info("slurm_job_modify: for job %u, invalid licenses value: %s", job_rec.job_id, job_desc.licenses) --- ESLURM_INVALID_LICENSES is 2048 - return 2048 + return slurm.ESLURM_INVALID_LICENSES end return 0 diff --git a/doc/html/gres.shtml b/doc/html/gres.shtml index 0fc84e68540f13ec2a54cf55f54579af710ae5e2..451a3e702ebeff25ebc3979315eb77f4bd4c3bb0 100644 --- a/doc/html/gres.shtml +++ b/doc/html/gres.shtml @@ -79,6 +79,11 @@ The name can include a numberic range suffix to be interpreted by SLURM This field is generally required if enforcement of generic resource allocations is to be supported (i.e. prevents a users from making use of resources allocated to a different user). +Enforcement of the file allocation relies upon Linux Control Groups (cgroups) +and Slurm's task/cgroup plugin, which will place the allocated files into +the job's cgroup and prevent use of other files. +Please see Slurm's <a href="cgroups.html">Cgroups Guide</a> for more +information.<br> If File is specified then Count must be either set to the number of file names specified or not set (the default value is the number of files specified). diff --git a/src/plugins/job_submit/lua/job_submit_lua.c b/src/plugins/job_submit/lua/job_submit_lua.c index 09f6344a9ee81bbceecdf29c1d7bc9b68ad1d2ec..1c7b1456c986fd18e7b634f8014bc232b1f17dd5 100644 --- a/src/plugins/job_submit/lua/job_submit_lua.c +++ b/src/plugins/job_submit/lua/job_submit_lua.c @@ -245,7 +245,7 @@ static void _register_lua_slurm_output_functions (void) lua_setfield (L, -2, "log_user"); /* - * slurm.SUCCESS, slurm.FAILURE and slurm.ERROR + * Error codes: slurm.SUCCESS, slurm.FAILURE, slurm.ERROR, etc. */ lua_pushnumber (L, SLURM_FAILURE); lua_setfield (L, -2, "FAILURE"); @@ -253,6 +253,8 @@ static void _register_lua_slurm_output_functions (void) lua_setfield (L, -2, "ERROR"); lua_pushnumber (L, SLURM_SUCCESS); lua_setfield (L, -2, "SUCCESS"); + lua_pushnumber (L, ESLURM_INVALID_LICENSES); + lua_setfield (L, -2, "ESLURM_INVALID_LICENSES"); /* diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 00d955e7784d24e1993f561536d82ed8bdc4b441..bb7d8f9682f6d195e3a3320108ae1917c49159a4 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -4337,7 +4337,7 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, List part_ptr_list = NULL; bitstr_t *req_bitmap = NULL, *exc_bitmap = NULL; struct job_record *job_ptr = NULL; - slurmdb_association_rec_t assoc_rec, *assoc_ptr; + slurmdb_association_rec_t assoc_rec, *assoc_ptr = NULL; List license_list = NULL; bool valid; slurmdb_qos_rec_t qos_rec, *qos_ptr; @@ -11746,15 +11746,14 @@ extern void build_cg_bitmap(struct job_record *job_ptr) /* job_hold_requeue() * - * Requeue the job either in JOB_SPECIAL_EXIT state - * in which is put on hold or if JOB_REQUEUE_HOLD is - * specified don't change its state. The requeue - * can happen directly from job_requeue() or from - * job_epilog_complete() after the last component - * has finished. + * Requeue the job based upon its current state. + * If JOB_SPECIAL_EXIT then requeue and hold with JOB_SPECIAL_EXIT state. + * If JOB_REQUEUE_HOLD then requeue and hold. + * If JOB_REQUEUE then requeue and let it run again. + * The requeue can happen directly from job_requeue() or from + * job_epilog_complete() after the last component has finished. */ -void -job_hold_requeue(struct job_record *job_ptr) +extern void job_hold_requeue(struct job_record *job_ptr) { uint32_t state; uint32_t flags; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 049807d95c57f4b2f4dd6dfb5d3b40754d71fb55..90dc074abfd0e7245398c7e9419d5b704350fc60 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -770,6 +770,13 @@ enum select_plugindata_info { extern void abort_job_on_node(uint32_t job_id, struct job_record *job_ptr, char *node_name); +/* + * allocated_session_in_use - check if an interactive session is already running + * IN new_alloc - allocation (alloc_node:alloc_sid) to test for + * Returns true if an interactive session of the same node:sid already exists. + */ +extern bool allocated_session_in_use(job_desc_msg_t *new_alloc); + /* Complete a batch job requeue logic after all steps complete so that * subsequent jobs appear in a separate accounting record. */ void batch_requeue_fini(struct job_record *job_ptr); @@ -1006,6 +1013,13 @@ extern int init_node_conf (); */ extern int init_part_conf (void); +/* init_requeue_policy() + * + * Build the arrays holding the job exit code upon + * which jobs should get requeued. + */ +extern void init_requeue_policy(void); + /* * is_node_down - determine if the specified node's state is DOWN * IN name - name of the node @@ -1020,13 +1034,6 @@ extern bool is_node_down (char *name); */ extern bool is_node_resp (char *name); -/* - * allocated_session_in_use - check if an interactive session is already running - * IN new_alloc - allocation (alloc_node:alloc_sid) to test for - * Returns true if an interactive session of the same node:sid already exists. - */ -extern bool allocated_session_in_use(job_desc_msg_t *new_alloc); - /* * job_alloc_info - get details about an existing job allocation * IN uid - job issuing the code @@ -1117,6 +1124,18 @@ extern void job_fini (void); */ extern int job_fail(uint32_t job_id, uint16_t job_state); + +/* job_hold_requeue() + * + * Requeue the job based upon its current state. + * If JOB_SPECIAL_EXIT then requeue and hold with JOB_SPECIAL_EXIT state. + * If JOB_REQUEUE_HOLD then requeue and hold. + * If JOB_REQUEUE then requeue and let it run again. + * The requeue can happen directly from job_requeue() or from + * job_epilog_complete() after the last component has finished. + */ +extern void job_hold_requeue(struct job_record *job_ptr); + /* * determine if job is ready to execute per the node select plugin * IN job_id - job to test @@ -1875,7 +1894,6 @@ extern int sync_job_files(void); /* After recovering job state, if using priority/basic then we increment the * priorities of all jobs to avoid decrementing the base down to zero */ extern void sync_job_priorities(void); - /* * update_job - update a job's parameters per the supplied specifications * IN job_specs - a job's specification @@ -2029,19 +2047,4 @@ extern bool validate_super_user(uid_t uid); */ extern bool validate_operator(uid_t uid); -/* job_hold_requeue() - requeue a job in hold or requeue_exit - * state. - * - * IN - job record - */ -extern void job_hold_requeue(struct job_record *job_ptr); - -/* init_requeue_policy() - * - * Build the arrays holding the job exit code upon - * which jobs should get requeued. - */ -extern void init_requeue_policy(void); - - #endif /* !_HAVE_SLURMCTLD_H */ diff --git a/src/slurmd/slurmstepd/slurmstepd_job.c b/src/slurmd/slurmstepd/slurmstepd_job.c index 4201634790e28268b00320d3e203ef0910c92fef..cfc3e8bdad2ca77f08f6ab2aa9f38abdf84a8adc 100644 --- a/src/slurmd/slurmstepd/slurmstepd_job.c +++ b/src/slurmd/slurmstepd/slurmstepd_job.c @@ -148,7 +148,7 @@ static char * _batchfilename(stepd_step_rec_t *job, const char *name) { if (name == NULL) { - if (job->array_task_id == (uint16_t) NO_VAL) + if (job->array_task_id == NO_VAL) return fname_create(job, "slurm-%J.out", 0); else return fname_create(job, "slurm-%A_%a.out", 0);