Skip to content
Snippets Groups Projects
Commit 49b4d829 authored by Moe Jette's avatar Moe Jette
Browse files

major re-write of logic to read /proc.

Some variables were being read as the wrong data type
(e.g. rss as an int intead of long int) which caused
problems on some systems.
parent 504428ed
No related branches found
No related tags found
No related merge requests found
...@@ -101,7 +101,7 @@ static pthread_mutex_t reading_mutex = PTHREAD_MUTEX_INITIALIZER; ...@@ -101,7 +101,7 @@ static pthread_mutex_t reading_mutex = PTHREAD_MUTEX_INITIALIZER;
static void _acct_kill_job(void); static void _acct_kill_job(void);
static void _get_offspring_data(List prec_list, prec_t *ancestor, pid_t pid); static void _get_offspring_data(List prec_list, prec_t *ancestor, pid_t pid);
static void _get_process_data(); static void _get_process_data();
static int _get_process_data_line(FILE *in, prec_t *prec); static int _get_process_data_line(int in, prec_t *prec);
static void *_watch_tasks(void *arg); static void *_watch_tasks(void *arg);
static void _destroy_prec(void *object); static void _destroy_prec(void *object);
...@@ -221,7 +221,7 @@ static void _get_process_data() { ...@@ -221,7 +221,7 @@ static void _get_process_data() {
fcntl(fd, F_SETFD, FD_CLOEXEC); fcntl(fd, F_SETFD, FD_CLOEXEC);
prec = xmalloc(sizeof(prec_t)); prec = xmalloc(sizeof(prec_t));
if (_get_process_data_line(stat_fp, prec)) if (_get_process_data_line(fd, prec))
list_append(prec_list, prec); list_append(prec_list, prec);
else else
xfree(prec); xfree(prec);
...@@ -287,7 +287,7 @@ static void _get_process_data() { ...@@ -287,7 +287,7 @@ static void _get_process_data() {
fcntl(fd, F_SETFD, FD_CLOEXEC); fcntl(fd, F_SETFD, FD_CLOEXEC);
prec = xmalloc(sizeof(prec_t)); prec = xmalloc(sizeof(prec_t));
if (_get_process_data_line(stat_fp, prec)) if (_get_process_data_line(fd, prec))
list_append(prec_list, prec); list_append(prec_list, prec);
else else
xfree(prec); xfree(prec);
...@@ -380,58 +380,58 @@ static void _acct_kill_job(void) ...@@ -380,58 +380,58 @@ static void _acct_kill_job(void)
/* _get_process_data_line() - get line of data from /proc/<pid>/stat /* _get_process_data_line() - get line of data from /proc/<pid>/stat
* *
* IN: in - input file channel * IN: in - input file descriptor
* OUT: prec - the destination for the data * OUT: prec - the destination for the data
* *
* RETVAL: ==0 - no valid data * RETVAL: ==0 - no valid data
* !=0 - data are valid * !=0 - data are valid
* *
* Note: It seems a bit wasteful to do all those atoi() and * Based upon stat2proc() from the ps command. It can handle arbitrary executable
* atol() conversions that are implicit in the scanf(), * file basenames for `cmd', i.e. those with embedded whitespace or embedded ')'s.
* but they help to ensure that we really are looking at the * Such names confuse %s (see scanf(3)), so the string is split and %39c is used
* expected type of record. * instead. (except for embedded ')' "(%[^)]c)" would work.
*/ */
static int _get_process_data_line(FILE *in, prec_t *prec) { static int _get_process_data_line(int in, prec_t *prec) {
/* discardable data */ char sbuf[256], *tmp;
int d; int num_read, nvals;
char c; char cmd[40], state[1];
char *s; int ppid, pgrp, session, tty_nr, tpgid;
uint32_t tmpu32; long unsigned flags, minflt, cminflt, majflt, cmajflt;
int max_path_len = pathconf("/", _PC_NAME_MAX); long unsigned utime, stime, starttime, vsize;
long int cutime, cstime, priority, nice, timeout, itrealvalue, rss;
/* useful datum */
int nvals; num_read = read(in, sbuf, (sizeof(sbuf) - 1));
if (num_read <= 0)
s = xmalloc(max_path_len + 1); return 0;
nvals=fscanf(in, sbuf[num_read] = '\0';
"%d %s %c %d %d "
"%d %d %d %d %d " tmp = strrchr(sbuf, ')'); /* split into "PID (cmd" and "<rest>" */
"%d %d %d %d %d " *tmp = '\0'; /* replace trailing ')' with NUL */
"%d %d %d %d %d " /* parse these two strings separately, skipping the leading "(". */
"%d %d %d %d %d", nvals = sscanf(sbuf, "%d (%39c", &prec->pid, cmd);
&prec->pid, s, &c, &prec->ppid, &d, if (nvals < 2)
&d, &d, &d, &tmpu32, &tmpu32, return 0;
&tmpu32, &prec->pages, &tmpu32, &prec->usec, &prec->ssec,
&tmpu32, &tmpu32, &tmpu32, &tmpu32, &tmpu32, nvals = sscanf(tmp + 2, /* skip space after ')' too */
&tmpu32, &tmpu32, &prec->vsize, &prec->rss, &tmpu32); "%c %d %d %d %d %d "
/* The fields in the record are "%lu %lu %lu %lu %lu "
* pid, command, state, ppid, pgrp, "%lu %lu %ld %ld %ld %ld "
* session, tty_nr, tpgid, flags, minflt, "%ld %ld %lu %lu %ld",
* cminflt, majflt, cmajflt, utime, stime, state, &ppid, &pgrp, &session, &tty_nr, &tpgid,
* cutime, cstime, priority, nice, lit_0, &flags, &minflt, &cminflt, &majflt, &cmajflt,
* itrealvalue, starttime, vsize, rss, rlim &utime, &stime, &cutime, &cstime, &priority, &nice,
*/ &timeout, &itrealvalue, &starttime, &vsize, &rss);
xfree(s); /* There are some additional fields, which we do not scan or use */
if ((nvals < 22) || (rss < 0))
prec->rss += 3; /* adjust for 3 page decrement return 0;
* performed for administrative
* purposes, see "man 5 proc" */ /* Copy the values that slurm records into our data structure */
if ((nvals != 25) || (prec->rss < 0) || (prec->vsize < 0)) prec->ppid = ppid;
return 0; /* Invalid data read */ prec->pages = majflt;
prec->usec = utime;
prec->rss *= getpagesize(); /* convert rss from pages to bytes */ prec->ssec = stime;
prec->rss /= 1024; /* convert rss from bytes to KB */ prec->vsize = vsize / 1024; /* convert from bytes to KB */
prec->vsize /= 1024; /* and convert vsize from bytes to KB */ prec->rss = rss * getpagesize() / 1024; /* convert from pages to KB */
return 1; return 1;
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment