Skip to content
Snippets Groups Projects
Commit 2d5093b6 authored by Danny Auble's avatar Danny Auble
Browse files
parent 4d83dfa4
No related branches found
No related tags found
No related merge requests found
...@@ -49,12 +49,12 @@ gather and report incomplete information for these calls; ...@@ -49,12 +49,12 @@ gather and report incomplete information for these calls;
\f2getrusage(3)\fP man page for information about which data are \f2getrusage(3)\fP man page for information about which data are
actually available on your system. actually available on your system.
.IP .IP
If --dump is specified, the field selection options (--brief, If \-\-dump is specified, the field selection options (\-\-brief,
--fields, ...) have no effect. \-\-format, ...) have no effect.
.IP .IP
Elapsed time fields are presented as 2 fields, integral seconds and integral microseconds Elapsed time fields are presented as 2 fields, integral seconds and integral microseconds
.IP .IP
If --dump is not specified, elapsed time fields are presented as If \-\-dump is not specified, elapsed time fields are presented as
[[days-]hours:]minutes:seconds.hundredths. [[days-]hours:]minutes:seconds.hundredths.
.IP .IP
The default input file is the file named in the jobacct_logfile The default input file is the file named in the jobacct_logfile
...@@ -118,21 +118,15 @@ data output when this option is used. ...@@ -118,21 +118,15 @@ data output when this option is used.
.TP .TP
\f3\-\-duplicates\fP \f3\-\-duplicates\fP
If SLURM job ids are reset, but the job accounting log file isn't If SLURM job ids are reset, but the job accounting log file isn't
reset at the same time (with -e, for example), some job numbers will reset at the same time (with \-e, for example), some job numbers will
probably appear more than once in the accounting log file to refer to probably appear more than once in the accounting log file to refer to
different jobs; such jobs can be distinguished by the "submit" time different jobs; such jobs can be distinguished by the "submit" time
stamp in the data records. stamp in the data records.
.IP .IP
When data for specific jobs are requested with the --jobs option, we When data for specific jobs are requested with the \-\-jobs option, we
assume that the user wants to see only the most recent job with that assume that the user wants to see only the most recent job with that
number. This behavior can be overridden by specifying --duplicates, in number. This behavior can be overridden by specifying \-\-duplicates, in
which case all records that match the selection criteria will be returned. which case all records that match the selection criteria will be returned.
.IP
When --jobs is not specified, we report data for all jobs that match
the selection criteria, even if some of the job numbers are
reused. Specify that you only want the most recent job for each
selected job number with the --noduplicates option.
.TP .TP
\f3\-e \fP\f3,\fP \f3\-\-helpformat\fP \f3\-e \fP\f3,\fP \f3\-\-helpformat\fP
......
...@@ -990,6 +990,7 @@ extern int down_nodecard(char *bp_name, bitoff_t io_start) ...@@ -990,6 +990,7 @@ extern int down_nodecard(char *bp_name, bitoff_t io_start)
/* Translate 1 nodecard count to ionode count */ /* Translate 1 nodecard count to ionode count */
if((io_cnt *= bluegene_io_ratio)) if((io_cnt *= bluegene_io_ratio))
io_cnt--; io_cnt--;
/* make sure we create something that is able to be /* make sure we create something that is able to be
created */ created */
if(bluegene_smallest_block < bluegene_nodecard_node_cnt) if(bluegene_smallest_block < bluegene_nodecard_node_cnt)
...@@ -1004,6 +1005,16 @@ extern int down_nodecard(char *bp_name, bitoff_t io_start) ...@@ -1004,6 +1005,16 @@ extern int down_nodecard(char *bp_name, bitoff_t io_start)
bp_name); bp_name);
return EINVAL; return EINVAL;
} }
/* this is here for sanity check to make sure we don't core on
these bits when we set them below. */
if(io_start >= bluegene_numpsets
|| (io_start+io_cnt) >= bluegene_numpsets) {
debug("io %d-%d not configured on this "
"system, only %d ionodes per midplane",
io_start, io_start+io_cnt, bluegene_numpsets);
return EINVAL;
}
bp_bit = (node_ptr - node_record_table_ptr); bp_bit = (node_ptr - node_record_table_ptr);
memset(&blockreq, 0, sizeof(blockreq_t)); memset(&blockreq, 0, sizeof(blockreq_t));
......
...@@ -183,7 +183,16 @@ static int _test_down_nodecards(rm_BP_t *bp_ptr) ...@@ -183,7 +183,16 @@ static int _test_down_nodecards(rm_BP_t *bp_ptr)
rc = SLURM_ERROR; rc = SLURM_ERROR;
goto clean_up; goto clean_up;
} }
/* make sure we have this midplane in the system */
if(coord[X] >= DIM_SIZE[X]
|| coord[Y] >= DIM_SIZE[Y]
|| coord[Z] >= DIM_SIZE[Z]) {
debug4("node %s isn't configured", bp_id);
rc = SLURM_SUCCESS;
goto clean_up;
}
node_name = xstrdup_printf("%s%c%c%c", node_name = xstrdup_printf("%s%c%c%c",
bg_slurm_node_prefix, bg_slurm_node_prefix,
alpha_num[coord[X]], alpha_num[coord[X]],
...@@ -263,11 +272,28 @@ static int _test_down_nodecards(rm_BP_t *bp_ptr) ...@@ -263,11 +272,28 @@ static int _test_down_nodecards(rm_BP_t *bp_ptr)
io_start = atoi((char*)nc_name+1); io_start = atoi((char*)nc_name+1);
io_start *= bluegene_io_ratio; io_start *= bluegene_io_ratio;
#endif #endif
/* On small systems with less than a midplane the
database may see the nodecards there but in missing
state. To avoid getting a bunch of warnings here just
skip over the ones missing.
*/
if(io_start >= bluegene_numpsets) {
if(state == RM_NODECARD_MISSING) {
debug3("Nodecard %s is missing continue",
nc_name);
} else {
error("We don't have the system configured "
"for this nodecard %s, we only have "
"%d ionodes and this starts at %d",
nc_name, io_start, bluegene_numpsets);
}
free(nc_name);
continue;
}
/* if(!ionode_bitmap) */ /* if(!ionode_bitmap) */
/* ionode_bitmap = bit_alloc(bluegene_numpsets); */ /* ionode_bitmap = bit_alloc(bluegene_numpsets); */
/* info("setting %d-%d of %d", */ /* info("setting %s start %d of %d", */
/* io_start, io_start+io_cnt, bluegene_numpsets); */ /* nc_name, io_start, bluegene_numpsets); */
/* bit_nset(ionode_bitmap, io_start, io_start+io_cnt); */ /* bit_nset(ionode_bitmap, io_start, io_start+io_cnt); */
/* we have to handle each nodecard separately to make /* we have to handle each nodecard separately to make
sure we don't create holes in the system */ sure we don't create holes in the system */
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment