Skip to content
Snippets Groups Projects
Commit dad593bc authored by Moe Jette's avatar Moe Jette
Browse files

Add WaitTime, MaxJobCount and MinJobAge configuration parameters.

parent cca27310
No related branches found
No related tags found
No related merge requests found
.TH "slurm.conf" "5" "March 2003" "Morris Jette" "Slurm configuration file"
.TH "slurm.conf" "5" "July 2003" "Morris Jette" "Slurm configuration file"
.SH "NAME"
slurm.conf \- Slurm configuration file
.SH "DESCRIPTION"
......@@ -113,6 +113,20 @@ If the job fails to terminate gracefully
in the interval specified, it will be forcably terminated.
The default value is 30 seconds.
.TP
\fBMaxJobCount\fR
The maximum number of jobs SLURM can have in its active database
at one time. Set the values of \fBMaxJobCount\fR and \fBMinJobAge\fR
to insure the slurmctld daemon does not exhaust its memory or other
resources. Once this limit is reached, requests to submit additional
jobs will fail. The default value is 2000 jobs.
.TP
\fBMinJobAge\fR
The minimum age of a completed job before its record is purged from
SLURM's active database. Set the values of \fBMaxJobCount\fR and
\fBMinJobAge\fR to insure the slurmctld daemon does not exhaust
its memory or other resources. The default value is 300 seconds.
A value of zero prevents any job record purging.
.TP
\fBPluginDir\fR
Identifies the places in which to look for SLURM plugins.
This is a colon-separated list of directories, like the PATH
......@@ -184,7 +198,9 @@ to for work. The default value is SLURMD_PORT as established at system
build time.
.TP
\fBSlurmdSpoolDir\fR
Fully qualified pathname of a file into which the \fBslurmd\fR daemon's state information is written. This must be a common pathname for all nodes, but should represent a file which is local to each node (reference a local file
Fully qualified pathname of a file into which the \fBslurmd\fR daemon's state
information is written. This must be a common pathname for all nodes, but
should represent a file which is local to each node (reference a local file
system). The default value is "/tmp/slurmd".
.TP
\fBSlurmdTimeout\fR
......@@ -202,7 +218,14 @@ into this directory.
.TP
\fBTmpFS\fR
Fully qualified pathname of the file system available to user jobs for
temporary storage. This parameter is used in establishing a node's \fBTmpDisk\fR space. The default value is "/tmp".
temporary storage. This parameter is used in establishing a node's \fBTmpDisk\fR space.
The default value is "/tmp".
.TP
\fBWaitTimefR
Specifies how many seconds the srun command should by default wait after
the first task terminates before terminating all remaining tasks. The
"--wait" option on the srun command line overrides this value.
If set to 0, this feature is disabled.
.LP
The configuration of nodes (or machines) to be managed by Slurm is
also specified in \fB/etc/slurm.conf\fR.
......@@ -433,6 +456,10 @@ InactiveLimit=120
.br
KillWait=30
.br
MaxJobCount=10000
.br
MinJobAge=3600
.br
PluginDir=/usr/local/lib:/usr/local/slurm/lib
.br
Prioritize=/usr/local/maui/priority
......@@ -455,6 +482,8 @@ StateSaveLocation=/usr/local/slurm/slurm.state
.br
TmpFS=/tmp
.br
WaitTime=30
.br
JobCredentialPrivateKey=/usr/local/slurm/private.key
.br
JobCredentialPublicCertificate=/usr/local/slurm/public.cert
......
......@@ -161,17 +161,26 @@
# "InactiveLimit" : The interval, in seconds, a job is permitted to
# be inactive (no active job steps) before it is
# terminated.
#
# "MinJobAge" : The time, in seconds, after a job completes before
# its record is purged from the active slurmctld data.
#
# "KillWait" : The time, in seconds, between SIGTERM and SIGKILL
# signals sent to a job upon reaching its timelimit.
#
# "WaitTime" : Specifies how many seconds srun should wait after the
# first task terminates before terminating all remaining
# tasks. If set to 0, this feature is disabled.
#
# Example:
#
# SlurmctldTimeout=60 # Defaults to 300 seconds
# SlurmdTimeout=60 # Defaults to 300 seconds
# HeartBeatInterval=20 # Defaults to 30 seconds
# InactiveLimit=600 # Defaults to 0 (unlimited)
# MinJobAge=30 # Defaults to 300 seconds
# KillWait=10 # Defaults to 30 seconds
# WaitTime=30 # Defaults to 0 (unlimited)
#
......@@ -195,6 +204,10 @@
# nodes will stay in the down state until manually
# brought into the IDLE state.
#
# "MaxJobCount" : defines the maximum number of jobs slurmctld can
# have in its active database at one time. Set the
# values of MaxJobCount and MinJobAge so as to avoid
# having slurmctld exhaust its memory or other resources.
# Example:
#
# FastSchedule=0 # default is `1'
......@@ -202,6 +215,7 @@
# HashBase=8 # default is `10'
# Prioritize=/etc/prio # default is none
# ReturnToService=1 # default is `0'
# MaxJobCount=10000 # Defaults to 2000
#
......
......@@ -97,6 +97,8 @@ BEGIN_C_DECLS
/* eg. the maximum count of nodes any job may use in some partition */
#define INFINITE (0xffffffff)
#define NO_VAL (0xfffffffe)
#define DEFAULT_KILL_WAIT 30
#define DEFAULT_WAIT_TIME 0
#define MAX_TASKS_PER_NODE 64
/* last entry must be JOB_END, keep in sync with job_state_string and
......@@ -366,6 +368,9 @@ typedef struct slurm_ctl_conf {
* inactive resource allocation is released */
uint16_t kill_wait; /* seconds between SIGXCPU to SIGKILL
* on job termination */
uint16_t max_job_cnt; /* maximum number of active jobs */
uint16_t min_job_age; /* COMPLETED jobs over this age (secs)
* purged from in memory records */
char *plugindir; /* pathname to plugins */
char *prioritize; /* pathname of program to set initial job
* priority */
......@@ -391,6 +396,7 @@ typedef struct slurm_ctl_conf {
char *state_save_location;/* pathname of slurmctld state save
* directory */
char *tmp_fs; /* pathname of temporary file system */
uint16_t wait_time; /* default job --wait time */
char *job_credential_private_key; /* path to private key */
char *job_credential_public_certificate;/* path to public certificate*/
} slurm_ctl_conf_t;
......
......@@ -80,6 +80,10 @@ void slurm_print_ctl_conf ( FILE* out,
slurm_ctl_conf_ptr->job_credential_public_certificate);
fprintf(out, "KillWait = %u\n",
slurm_ctl_conf_ptr->kill_wait);
fprintf(out, "MaxJobCnt = %u\n",
slurm_ctl_conf_ptr->max_job_cnt);
fprintf(out, "MinJobAge = %u\n",
slurm_ctl_conf_ptr->min_job_age);
fprintf(out, "PluginDir = %s\n",
slurm_ctl_conf_ptr->plugindir);
fprintf(out, "Prioritize = %s\n",
......@@ -119,6 +123,8 @@ void slurm_print_ctl_conf ( FILE* out,
slurm_ctl_conf_ptr->state_save_location);
fprintf(out, "TmpFS = %s\n",
slurm_ctl_conf_ptr->tmp_fs);
fprintf(out, "WaitTime = %u\n",
slurm_ctl_conf_ptr->wait_time);
}
/*
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment