Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
S
Slurm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Service Desk
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
tud-zih-energy
Slurm
Commits
a600e2cf
Commit
a600e2cf
authored
19 years ago
by
Moe Jette
Browse files
Options
Downloads
Patches
Plain Diff
svn merge -r6526:6609
https://eris.llnl.gov/svn/slurm/branches/slurm-0-6-branch
parent
7d76fdaa
No related branches found
No related tags found
No related merge requests found
Changes
3
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
NEWS
+1
-1
1 addition, 1 deletion
NEWS
doc/html/quickstart_admin.html
+2
-1
2 additions, 1 deletion
doc/html/quickstart_admin.html
src/plugins/jobacct/log/jobacct_log.c
+31
-19
31 additions, 19 deletions
src/plugins/jobacct/log/jobacct_log.c
with
34 additions
and
21 deletions
NEWS
+
1
−
1
View file @
a600e2cf
...
@@ -57,7 +57,7 @@ documents those changes that are of interest to users and admins.
...
@@ -57,7 +57,7 @@ documents those changes that are of interest to users and admins.
* Changes in SLURM 0.6.9
* Changes in SLURM 0.6.9
========================
========================
-- Fix bug in mpi plugin to set the ID correctly
-- Fix bug in mpi plugin to set the ID correctly
-- Accounting bug causing segv fixed (Andy Riebs, 14oct.jobacct.patch)
* Changes in SLURM 0.6.8
* Changes in SLURM 0.6.8
========================
========================
...
...
This diff is collapsed.
Click to expand it.
doc/html/quickstart_admin.html
+
2
−
1
View file @
a600e2cf
...
@@ -102,7 +102,8 @@ are denoted below.
...
@@ -102,7 +102,8 @@ are denoted below.
package.) The switch/elan plugin also requires the
package.) The switch/elan plugin also requires the
presence of the libelanosts library and /etc/elanhosts
presence of the libelanosts library and /etc/elanhosts
configuration file. (See elanhosts(5) man page in that
configuration file. (See elanhosts(5) man page in that
package for more details)
package for more details). Finally, the "ptrack" kernel
patch is required for process tracking.
</ul>
</ul>
Please see the
<a
href=
download.html
>
Download
</a>
page for references to
Please see the
<a
href=
download.html
>
Download
</a>
page for references to
required software to build these plugins.
</p>
required software to build these plugins.
</p>
...
...
This diff is collapsed.
Click to expand it.
src/plugins/jobacct/log/jobacct_log.c
+
31
−
19
View file @
a600e2cf
...
@@ -725,24 +725,34 @@ int slurmd_jobacct_smgr(void)
...
@@ -725,24 +725,34 @@ int slurmd_jobacct_smgr(void)
int
slurmd_jobacct_task_exit
(
slurmd_job_t
*
job
,
pid_t
pid
,
int
status
,
struct
rusage
*
rusage
)
int
slurmd_jobacct_task_exit
(
slurmd_job_t
*
job
,
pid_t
pid
,
int
status
,
struct
rusage
*
rusage
)
{
{
_jrec_t
*
jrec
;
_jrec_t
*
jrec
;
int
rc
=
SLURM_SUCCESS
;
int
rc
=
SLURM_SUCCESS
;
static
int
active
=
0
;
if
(
active
==
0
)
active
=
job
->
ntasks
-
1
;
else
active
--
;
debug2
(
"slurmd_jobacct_task_exit for job %u.%u,"
debug2
(
"slurmd_jobacct_task_exit(%d) for job %u.%u,"
" node %d, status=%d"
,
" node %d, status=%d, nprocs %d, active %d"
,
job
->
jobid
,
job
->
stepid
,
job
->
nodeid
,
status
/
256
);
getpid
(),
job
->
jobid
,
job
->
stepid
,
job
->
nodeid
,
status
/
256
,
job
->
nprocs
,
active
);
jrec
=
_alloc_jrec
(
job
);
jrec
=
_alloc_jrec
(
job
);
jrec
->
nodeid
=
job
->
nodeid
;
jrec
->
nodeid
=
job
->
nodeid
;
memcpy
(
&
jrec
->
rusage
,
rusage
,
sizeof
(
struct
rusage
));
memcpy
(
&
jrec
->
rusage
,
rusage
,
sizeof
(
struct
rusage
));
jrec
->
status
=
status
/
256
;
jrec
->
status
=
status
/
256
;
if
(
prec_frequency
)
{
/* if dynamic monitoring */
if
(
prec_frequency
)
/* if dynamic monitoring */
slurm_mutex_lock
(
&
precTable_lock
);
/* let watcher finish loop */
if
(
active
==
0
)
{
pthread_cancel
(
_watch_tasks_thread_id
);
debug3
(
"slurmd_jobacct_task_exit(%d) cancelling "
pthread_join
(
_watch_tasks_thread_id
,
NULL
);
"_watch_tasks"
,
slurm_mutex_unlock
(
&
precTable_lock
);
getpid
(),
job
->
jobid
,
job
->
stepid
);
jrec
->
max_psize
=
max_psize
;
pthread_cancel
(
_watch_tasks_thread_id
);
jrec
->
max_vsize
=
max_vsize
;
pthread_join
(
_watch_tasks_thread_id
,
NULL
);
}
}
jrec
->
max_psize
=
max_psize
;
jrec
->
max_vsize
=
max_vsize
;
rc
=
_send_data_to_mynode
(
TASKDATA
,
jrec
);
rc
=
_send_data_to_mynode
(
TASKDATA
,
jrec
);
xfree
(
jrec
);
xfree
(
jrec
);
return
rc
;
return
rc
;
...
@@ -856,6 +866,7 @@ static _jrec_t *_get_jrec_by_jobstep(List jrecs, uint32_t jobid,
...
@@ -856,6 +866,7 @@ static _jrec_t *_get_jrec_by_jobstep(List jrecs, uint32_t jobid,
uint32_t
stepid
)
{
uint32_t
stepid
)
{
_jrec_t
*
jrec
=
NULL
;
_jrec_t
*
jrec
=
NULL
;
ListIterator
i
;
ListIterator
i
;
if
(
jrecs
==
NULL
)
{
if
(
jrecs
==
NULL
)
{
error
(
"no accounting job list"
);
error
(
"no accounting job list"
);
return
jrec
;
return
jrec
;
...
@@ -1360,11 +1371,6 @@ static int _send_data_to_node_0(_jrec_t *jrec) {
...
@@ -1360,11 +1371,6 @@ static int _send_data_to_node_0(_jrec_t *jrec) {
int
rc
=
SLURM_SUCCESS
,
int
rc
=
SLURM_SUCCESS
,
retry
;
retry
;
if
(
!
strcmp
(
jrec
->
node0
,
NOT_FOUND
))
{
error
(
"jobacct(%d): job %d has no node0"
);
return
SLURM_SUCCESS
;
/* can't do anything here */
}
debug2
(
"jobacct(%d): in _send_data_to_node_0(job %u), nodes0,1=%s,%s"
debug2
(
"jobacct(%d): in _send_data_to_node_0(job %u), nodes0,1=%s,%s"
", utime=%d.%06d"
,
", utime=%d.%06d"
,
getpid
(),
jrec
->
jobid
,
jrec
->
node0
,
jrec
->
node1
,
getpid
(),
jrec
->
jobid
,
jrec
->
node0
,
jrec
->
node1
,
...
@@ -1376,6 +1382,12 @@ static int _send_data_to_node_0(_jrec_t *jrec) {
...
@@ -1376,6 +1382,12 @@ static int _send_data_to_node_0(_jrec_t *jrec) {
return
rc
;
return
rc
;
}
}
if
(
strcmp
(
jrec
->
node0
,
NOT_FOUND
)
==
0
)
{
error
(
"jobacct(%d): job %d has no node0"
,
getpid
(),
jrec
->
jobid
);
return
SLURM_SUCCESS
;
/* can't do anything here */
}
/* make a stats_msg */
/* make a stats_msg */
stats
.
msg_type
=
htonl
(
TO_NODE0
);
stats
.
msg_type
=
htonl
(
TO_NODE0
);
stats
.
jobid
=
htonl
(
jrec
->
jobid
);
stats
.
jobid
=
htonl
(
jrec
->
jobid
);
...
@@ -1602,8 +1614,8 @@ static void *_watch_tasks(void *arg) {
...
@@ -1602,8 +1614,8 @@ static void *_watch_tasks(void *arg) {
while
(
1
)
{
/* Do this until slurm_jobacct_task_exit() stops us */
while
(
1
)
{
/* Do this until slurm_jobacct_task_exit() stops us */
sleep
(
prec_frequency
);
sleep
(
prec_frequency
);
pthread_testcancel
();
pthread_testcancel
();
slurm_mutex_lock
(
&
precTable_lock
);
pthread_setcancelstate
(
PTHREAD_CANCEL_DISABLE
,
&
tmp
);
pthread_setcancelstate
(
PTHREAD_CANCEL_DISABLE
,
&
tmp
);
slurm_mutex_lock
(
&
precTable_lock
);
_get_process_data
();
/* Update the data */
_get_process_data
();
/* Update the data */
slurm_mutex_unlock
(
&
precTable_lock
);
slurm_mutex_unlock
(
&
precTable_lock
);
pthread_setcancelstate
(
PTHREAD_CANCEL_ENABLE
,
&
tmp
);
pthread_setcancelstate
(
PTHREAD_CANCEL_ENABLE
,
&
tmp
);
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment