diff --git a/NEWS b/NEWS index 7a00d4d0448c93f0b5dc4c41e872b0bb61063b56..a162a1c6c85b713747fd73b512f8a9af9c172e58 100644 --- a/NEWS +++ b/NEWS @@ -10,7 +10,7 @@ documents those changes that are of interest to users and admins. files are opened ("t" for truncate, "a" for append). -- Added checkpoint/xlch plugin for use with XLCH (Hongjia Cao, NUDT). -- Added srun option --checkpoint-path for use with XLCH (Hongjia Cao, NUDT). - -- Added new srun/sbatch option "--acctg-freq" for user control over + -- Added new srun/salloc/sbatch option "--acctg-freq" for user control over accounting data collection polling interval. * Changes in SLURM 1.3.0-pre7 diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 92a52fc3c487119778d35a6ed2ef4e53949af089..afb5d10147ca6cd70972eccb17a83df694ee3fb4 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -13,6 +13,14 @@ The command may be any program the user wishes. Some typical commands are xterm .SH "OPTIONS" .LP +.TP +\fB\-\-acctg\-freq\fR=\fIseconds\fR +Define the job accounting sampling interval. +This can be used to override the \fIJobAcctGatherFrequency\fR parameter in SLURM's +configuration file, \fIslurm.conf\fR. +A value of zero disables real the periodic job sampling and provides accounting +information only on job termination (reducing SLURM interference with the job). + .TP \fB\-B\fR \fB\-\-extra\-node\-info\fR=\fIsockets\fR[:\fIcores\fR[:\fIthreads\fR]] Request a specific allocation of resources with details as to the @@ -514,6 +522,9 @@ variables settings. \fBSALLOC_ACCOUNT\fR Same as \fB\-\-account\fR. .TP +\fBSALLOC_ACCTG_FREQ\fR +Same as \fB\-\-acctg\-freq\fR. +.TP \fBSALLOC_BELL\fR Same as \fB\-\-bell\fR. .TP diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index f388661f7dfb4f5e1a94579d4e90747497fd5577..76f0cc0912692470b040239c4a995d41f0e94a57 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -612,6 +612,9 @@ environment variables. \fBSBATCH_ACCOUNT\fR Same as \fB\-\-account\fR. .TP +\fBSALLOC_ACCTG_FREQ\fR +Same as \fB\-\-acctg\-freq\fR. +.TP \fBSBATCH_CONN_TYPE\fR Same as \fB\-\-conn\-type\fR. .TP diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 9f37053f6f8cf31413e02d93660a1af0f5fe417a..9d6cf0c52bc280109f917ac039ee6198bdbaf49a 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -87,6 +87,7 @@ #define OPT_JOBID 0x11 #define OPT_EXCLUSIVE 0x12 #define OPT_OVERCOMMIT 0x13 +#define OPT_ACCTG_FREQ 0x14 /* generic getopt_long flags, integers and *not* valid characters */ #define LONG_OPT_JOBID 0x105 @@ -122,6 +123,7 @@ #define LONG_OPT_NTASKSPERCORE 0x138 #define LONG_OPT_JOBMEM 0x13a #define LONG_OPT_HINT 0x13b +#define LONG_OPT_ACCTG_FREQ 0x13c /*---- global variables, defined in opt.h ----*/ opt_t opt; @@ -279,6 +281,7 @@ static void _opt_default() opt.egid = (gid_t) -1; opt.bell = BELL_AFTER_DELAY; + opt.acctg_freq = -1; } /*---[ env var processing ]-----------------------------------------------*/ @@ -314,6 +317,7 @@ env_vars_t env_vars[] = { {"SALLOC_NO_BELL", OPT_NO_BELL, NULL, NULL }, {"SALLOC_EXCLUSIVE", OPT_EXCLUSIVE, NULL, NULL }, {"SALLOC_OVERCOMMIT", OPT_OVERCOMMIT, NULL, NULL }, + {"SALLOC_ACCTG_FREQ", OPT_INT, &opt.acctg_freq, NULL }, {NULL, 0, NULL, NULL} }; @@ -521,6 +525,7 @@ void set_options(const int argc, char **argv) {"linux-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, {"mloader-image", required_argument, 0, LONG_OPT_MLOADER_IMAGE}, {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, + {"acctg-freq", required_argument, 0, LONG_OPT_ACCTG_FREQ}, {NULL, 0, 0, 0} }; char *opt_string = "+a:B:c:C:d:F:g:hHIJ:kK:m:n:N:Op:qR:st:uU:vVw:W:x:"; @@ -864,6 +869,9 @@ void set_options(const int argc, char **argv) xfree(opt.ramdiskimage); opt.ramdiskimage = xstrdup(optarg); break; + case LONG_OPT_ACCTG_FREQ: + opt.acctg_freq = _get_int(optarg, "acctg-freq"); + break; default: fatal("Unrecognized command line parameter %c", opt_char); diff --git a/src/salloc/opt.h b/src/salloc/opt.h index bdf773d92488d04c515828a285198b8949a56145..769613592cd134b422bae1028313d88e1a28af12 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -92,6 +92,7 @@ typedef struct salloc_options { bool hold; /* --hold, -H */ bool no_kill; /* --no-kill, -k */ + int acctg_freq; /* --acctg-freq=secs */ bool overcommit; /* --overcommit -O */ int kill_command_signal;/* --kill-command, -K */ bool kill_command_signal_set; diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index 6bd5fdb7b2ffbdfefe2d75e2723893efcd47e754..479cf370414d59658fa95e4ed1261c51d0717426 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -192,6 +192,10 @@ int main(int argc, char *argv[]) env_array_append_fmt(&env, "SLURM_OVERCOMMIT", "%d", opt.overcommit); } + if (opt.acctg_freq >= 0) { + env_array_append_fmt(&env, "SLURM_ACCTG_FREQ", "%d", + opt.acctg_freq); + } env_array_set_environment(env); env_array_free(env); pthread_mutex_lock(&allocation_state_lock); diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 5d29cabf976f916999ac37fe2922a2871a339980..58e7b1494776b07e73128752838a94078ad450d7 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -1253,7 +1253,7 @@ static void _set_options(int argc, char **argv) } break; case LONG_OPT_ACCTG_FREQ: - opt.acctg_freq = _get_int(optarg, "nice"); + opt.acctg_freq = _get_int(optarg, "acctg-freq"); break; default: fatal("Unrecognized command line parameter %c",