Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tud-zih-energy
Slurm
Commits
5d64195f
Commit
5d64195f
authored
10 years ago
by
jette
Browse files
Options
Downloads
Plain Diff
Merge branch 'slurm-14.11'
parents
1d75225b
d0dd1c53
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
NEWS
+1
-0
1 addition, 0 deletions
NEWS
src/plugins/job_submit/pbs/job_submit_pbs.c
+13
-15
13 additions, 15 deletions
src/plugins/job_submit/pbs/job_submit_pbs.c
src/slurmctld/job_submit.c
+6
-2
6 additions, 2 deletions
src/slurmctld/job_submit.c
with
20 additions
and
17 deletions
NEWS
+
1
−
0
View file @
5d64195f
...
@@ -106,6 +106,7 @@ documents those changes that are of interest to users and administrators.
...
@@ -106,6 +106,7 @@ documents those changes that are of interest to users and administrators.
-- Fix for slurmctld abort with GRES types configured and no CPU binding.
-- Fix for slurmctld abort with GRES types configured and no CPU binding.
-- Fix for GRES scheduling where count > 1 per topology type (or GRES types).
-- Fix for GRES scheduling where count > 1 per topology type (or GRES types).
-- Make CR_ONE_TASK_PER_CORE work correctly with task/affinity.
-- Make CR_ONE_TASK_PER_CORE work correctly with task/affinity.
-- job_submit/pbs - Fix possible deadlock.
* Changes in Slurm 14.11.3
* Changes in Slurm 14.11.3
==========================
==========================
...
...
This diff is collapsed.
Click to expand it.
src/plugins/job_submit/pbs/job_submit_pbs.c
+
13
−
15
View file @
5d64195f
...
@@ -97,6 +97,8 @@ const char plugin_type[] = "job_submit/pbs";
...
@@ -97,6 +97,8 @@ const char plugin_type[] = "job_submit/pbs";
const
uint32_t
plugin_version
=
100
;
const
uint32_t
plugin_version
=
100
;
const
uint32_t
min_plug_version
=
100
;
const
uint32_t
min_plug_version
=
100
;
static
pthread_mutex_t
depend_mutex
=
PTHREAD_MUTEX_INITIALIZER
;
int
init
(
void
)
int
init
(
void
)
{
{
return
SLURM_SUCCESS
;
return
SLURM_SUCCESS
;
...
@@ -202,6 +204,16 @@ static void _xlate_before(char *depend, uint32_t submit_uid, uint32_t my_job_id)
...
@@ -202,6 +204,16 @@ static void _xlate_before(char *depend, uint32_t submit_uid, uint32_t my_job_id)
return
;
return
;
}
}
/* NOTE: We are updating a job record here in order to implement
* the depend=before option. We are doing so without the write lock
* on the job record, but using a local mutex to prevent multiple
* updates on the same job when multiple jobs satisfying the dependency
* are being processed at the same time (all with read locks). The
* job read lock will prevent anyone else from getting a job write
* lock and using a job write lock causes serious performance problems
* for slow job_submit plugins. Not an ideal solution, but the best
* option that we see. */
slurm_mutex_lock
(
&
depend_mutex
);
tok
=
strtok_r
(
NULL
,
":"
,
&
last_ptr
);
tok
=
strtok_r
(
NULL
,
":"
,
&
last_ptr
);
while
(
tok
)
{
while
(
tok
)
{
job_id
=
atoi
(
tok
);
job_id
=
atoi
(
tok
);
...
@@ -239,6 +251,7 @@ static void _xlate_before(char *depend, uint32_t submit_uid, uint32_t my_job_id)
...
@@ -239,6 +251,7 @@ static void _xlate_before(char *depend, uint32_t submit_uid, uint32_t my_job_id)
}
}
tok
=
strtok_r
(
NULL
,
":"
,
&
last_ptr
);
tok
=
strtok_r
(
NULL
,
":"
,
&
last_ptr
);
}
}
slurm_mutex_unlock
(
&
depend_mutex
);
}
}
/* Translate PBS job dependencies to Slurm equivalents to the exptned possible
/* Translate PBS job dependencies to Slurm equivalents to the exptned possible
...
@@ -300,21 +313,9 @@ static void _xlate_dependency(struct job_descriptor *job_desc,
...
@@ -300,21 +313,9 @@ static void _xlate_dependency(struct job_descriptor *job_desc,
extern
int
job_submit
(
struct
job_descriptor
*
job_desc
,
uint32_t
submit_uid
)
extern
int
job_submit
(
struct
job_descriptor
*
job_desc
,
uint32_t
submit_uid
)
{
{
/* Locks: Read config, read job, read node, read partition */
slurmctld_lock_t
job_read_lock
=
{
READ_LOCK
,
READ_LOCK
,
READ_LOCK
,
READ_LOCK
};
/* Locks: Read config, write job, read node, read partition */
slurmctld_lock_t
job_write_lock
=
{
READ_LOCK
,
WRITE_LOCK
,
READ_LOCK
,
READ_LOCK
};
char
*
std_out
,
*
tok
;
char
*
std_out
,
*
tok
;
uint32_t
my_job_id
;
uint32_t
my_job_id
;
/* This plugin needs to write other job records, so we need to revert
* the locks set when this was called and set a job write lock.
* DO NOT NEST TWO LOCKS. UNLOCK OLD LOCK AND SET NEW LOCK AS NEEDED */
unlock_slurmctld
(
job_read_lock
);
lock_slurmctld
(
job_write_lock
);
my_job_id
=
get_next_job_id
();
my_job_id
=
get_next_job_id
();
_xlate_dependency
(
job_desc
,
submit_uid
,
my_job_id
);
_xlate_dependency
(
job_desc
,
submit_uid
,
my_job_id
);
...
@@ -360,9 +361,6 @@ extern int job_submit(struct job_descriptor *job_desc, uint32_t submit_uid)
...
@@ -360,9 +361,6 @@ extern int job_submit(struct job_descriptor *job_desc, uint32_t submit_uid)
xstrcat
(
job_desc
->
comment
,
std_out
);
xstrcat
(
job_desc
->
comment
,
std_out
);
}
}
unlock_slurmctld
(
job_write_lock
);
lock_slurmctld
(
job_read_lock
);
return
SLURM_SUCCESS
;
return
SLURM_SUCCESS
;
}
}
...
...
This diff is collapsed.
Click to expand it.
src/slurmctld/job_submit.c
+
6
−
2
View file @
5d64195f
...
@@ -241,7 +241,11 @@ extern int job_submit_plugin_submit(struct job_descriptor *job_desc,
...
@@ -241,7 +241,11 @@ extern int job_submit_plugin_submit(struct job_descriptor *job_desc,
START_TIMER
;
START_TIMER
;
rc
=
job_submit_plugin_init
();
rc
=
job_submit_plugin_init
();
slurm_mutex_lock
(
&
g_context_lock
);
slurm_mutex_lock
(
&
g_context_lock
);
for
(
i
=
0
;
((
i
<
g_context_cnt
)
&&
(
rc
==
SLURM_SUCCESS
));
i
++
)
/* NOTE: On function entry read locks are set on config, job, node and
* partition structures. Do not attempt to unlock them and then
* lock again (say with a write lock) since doing so will trigger
* a deadlock with the g_context_lock above. */
for
(
i
=
0
;
((
i
<
g_context_cnt
)
&&
(
rc
==
SLURM_SUCCESS
));
i
++
)
rc
=
(
*
(
ops
[
i
].
submit
))(
job_desc
,
submit_uid
,
err_msg
);
rc
=
(
*
(
ops
[
i
].
submit
))(
job_desc
,
submit_uid
,
err_msg
);
slurm_mutex_unlock
(
&
g_context_lock
);
slurm_mutex_unlock
(
&
g_context_lock
);
END_TIMER2
(
"job_submit_plugin_submit"
);
END_TIMER2
(
"job_submit_plugin_submit"
);
...
@@ -264,7 +268,7 @@ extern int job_submit_plugin_modify(struct job_descriptor *job_desc,
...
@@ -264,7 +268,7 @@ extern int job_submit_plugin_modify(struct job_descriptor *job_desc,
START_TIMER
;
START_TIMER
;
rc
=
job_submit_plugin_init
();
rc
=
job_submit_plugin_init
();
slurm_mutex_lock
(
&
g_context_lock
);
slurm_mutex_lock
(
&
g_context_lock
);
for
(
i
=
0
;
((
i
<
g_context_cnt
)
&&
(
rc
==
SLURM_SUCCESS
));
i
++
)
for
(
i
=
0
;
((
i
<
g_context_cnt
)
&&
(
rc
==
SLURM_SUCCESS
));
i
++
)
rc
=
(
*
(
ops
[
i
].
modify
))(
job_desc
,
job_ptr
,
submit_uid
);
rc
=
(
*
(
ops
[
i
].
modify
))(
job_desc
,
job_ptr
,
submit_uid
);
slurm_mutex_unlock
(
&
g_context_lock
);
slurm_mutex_unlock
(
&
g_context_lock
);
END_TIMER2
(
"job_submit_plugin_modify"
);
END_TIMER2
(
"job_submit_plugin_modify"
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment