From f5d252a8dc0420f4f9f88c352517af2f3d0d94e2 Mon Sep 17 00:00:00 2001 From: Don Lipari <lipari1@llnl.gov> Date: Tue, 1 Sep 2009 21:38:20 +0000 Subject: [PATCH] Completed work to add qos support to salloc/sbatch/srun --- doc/html/accounting.shtml | 5 ++++ doc/html/accounting_storageplugins.shtml | 4 +-- doc/html/resource_limits.shtml | 5 ++++ doc/man/man1/salloc.1 | 8 ++++++ doc/man/man1/sbatch.1 | 8 ++++++ doc/man/man1/srun.1 | 8 ++++++ src/api/job_info.c | 35 +++++++++++++++--------- src/slurmctld/job_mgr.c | 6 +++- 8 files changed, 63 insertions(+), 16 deletions(-) diff --git a/doc/html/accounting.shtml b/doc/html/accounting.shtml index b1c1031a330..7447c02352e 100644 --- a/doc/html/accounting.shtml +++ b/doc/html/accounting.shtml @@ -226,6 +226,11 @@ prevent users from accessing invalid accounts. <li>limits - This will enforce limits set to associations. By setting this option, the 'associations' option is also set. </li> +<li>qos - This will require all jobs to specify (either overtly or by +default) a valid qos (Quality of Service). QOS values are defined for +each association in the database. By setting this option, the +'associations' option is also set. +</li> <li>wckeys - This will prevent users from running jobs under a wckey that they don't have access to. By using this option, the 'associations' option is also set. The 'TrackWCKey' option is also diff --git a/doc/html/accounting_storageplugins.shtml b/doc/html/accounting_storageplugins.shtml index 7bdac1ae532..ebe7221031a 100644 --- a/doc/html/accounting_storageplugins.shtml +++ b/doc/html/accounting_storageplugins.shtml @@ -912,8 +912,8 @@ database with. <dt><span class="commandline">AccountingStorageEnforce</span> <dd>Specifies if we should enforce certain things be in existence before allowing job submissions and such valid options are - "associations, limits, and wckeys". You can use any combination of - those listed. + "associations, limits, qos, and wckeys". You can use any combination + of those listed. </dl> <h2>Versioning</h2> diff --git a/doc/html/resource_limits.shtml b/doc/html/resource_limits.shtml index afcb619afe1..a2e3469ffdf 100644 --- a/doc/html/resource_limits.shtml +++ b/doc/html/resource_limits.shtml @@ -52,6 +52,11 @@ prevent users from accessing invalid accounts. <li>limits - This will enforce limits set to associations. By setting this option, the 'associations' option is also set. </li> +<li>qos - This will require all jobs to specify (either overtly or by +default) a valid qos (Quality of Service). QOS values are defined for +each association in the database. By setting this option, the +'associations' option is also set. +</li> <li>wckeys - This will prevent users from running jobs under a wckey that they don't have access to. By using this option, the 'associations' option is also set. The 'TrackWCKey' option is also diff --git a/doc/man/man1/salloc.1 b/doc/man/man1/salloc.1 index 27129c3ab9b..32de207275e 100644 --- a/doc/man/man1/salloc.1 +++ b/doc/man/man1/salloc.1 @@ -709,6 +709,14 @@ partition as designated by the system administrator. \fB\-Q\fR, \fB\-\-quiet\fR Suppress informational messages from salloc. Errors will still be displayed. +.TP +\fB\-\-qos\fR=<\fIqos\fR> +Request a quality of service for the job. QOS values can be defined +for each user/cluster/account association in the SLURM database. +Users will be limited to their association's defined set of qos's when +the SLURM configuration parameter, AccountingStorageEnforce, is set to +"qos". + .TP \fB\-\-reservation\fR=<\fIname\fR> Allocate resources for the job from the named reservation. diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 4ba28c46128..e6b7234c4f8 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -796,6 +796,14 @@ The maximum stack size \fB\-Q\fR, \fB\-\-quiet\fR Suppress informational messages from sbatch. Errors will still be displayed. +.TP +\fB\-\-qos\fR=<\fIqos\fR> +Request a quality of service for the job. QOS values can be defined +for each user/cluster/account association in the SLURM database. +Users will be limited to their association's defined set of qos's when +the SLURM configuration parameter, AccountingStorageEnforce, is set to +"qos". + .TP \fB\-\-requeue\fR Specifies that the batch job should be requeued after node failure. diff --git a/doc/man/man1/srun.1 b/doc/man/man1/srun.1 index cd3266d164c..997f7d1f774 100644 --- a/doc/man/man1/srun.1 +++ b/doc/man/man1/srun.1 @@ -884,6 +884,14 @@ disables the status feature normally available when \fBsrun\fR receives a single Ctrl\-C and causes \fBsrun\fR to instead immediately terminate the running job. +.TP +\fB\-\-qos\fR=<\fIqos\fR> +Request a quality of service for the job. QOS values can be defined +for each user/cluster/account association in the SLURM database. +Users will be limited to their association's defined set of qos's when +the SLURM configuration parameter, AccountingStorageEnforce, is set to +"qos". + .TP \fB\-r\fR, \fB\-\-relative\fR=<\fIn\fR> Run a job step relative to node \fIn\fR of the current allocation. diff --git a/src/api/job_info.c b/src/api/job_info.c index a2679fe7b03..61bf810097b 100644 --- a/src/api/job_info.c +++ b/src/api/job_info.c @@ -522,9 +522,8 @@ line7: /****** Line 7 ******/ /****** Line 11 ******/ snprintf(tmp_line, sizeof(tmp_line), - "Dependency=%s Account=%s Requeue=%u Restarts=%u", - job_ptr->dependency, job_ptr->account, job_ptr->requeue, - job_ptr->restart_cnt); + "Dependency=%s Requeue=%u Restarts=%u", + job_ptr->dependency, job_ptr->requeue, job_ptr->restart_cnt); xstrcat(out, tmp_line); if (one_liner) xstrcat(out, " "); @@ -532,6 +531,16 @@ line7: /****** Line 7 ******/ xstrcat(out, "\n "); /****** Line 12 ******/ + snprintf(tmp_line, sizeof(tmp_line), + "Account=%s QOS=%s", + job_ptr->account, job_ptr->qos); + xstrcat(out, tmp_line); + if (one_liner) + xstrcat(out, " "); + else + xstrcat(out, "\n "); + + /****** Line 13 ******/ if (job_ptr->state_desc) { /* Replace white space with underscore for easier parsing */ for (j=0; job_ptr->state_desc[j]; j++) { @@ -550,7 +559,7 @@ line7: /****** Line 7 ******/ else xstrcat(out, "\n "); - /****** Line 13 ******/ + /****** Line 14 ******/ snprintf(tmp_line, sizeof(tmp_line), "Req%s=%s Req%sIndices=", nodelist, job_ptr->req_nodes, nodelist); xstrcat(out, tmp_line); @@ -567,7 +576,7 @@ line7: /****** Line 7 ******/ else xstrcat(out, "\n "); - /****** Line 14 ******/ + /****** Line 15 ******/ snprintf(tmp_line, sizeof(tmp_line), "Exc%s=%s Exc%sIndices=", nodelist, job_ptr->exc_nodes, nodelist); xstrcat(out, tmp_line); @@ -584,7 +593,7 @@ line7: /****** Line 7 ******/ else xstrcat(out, "\n "); - /****** Line 15 ******/ + /****** Line 16 ******/ slurm_make_time_str((time_t *)&job_ptr->submit_time, time_str, sizeof(time_str)); snprintf(tmp_line, sizeof(tmp_line), "SubmitTime=%s ", @@ -600,7 +609,7 @@ line7: /****** Line 7 ******/ time_str, (long int)job_ptr->pre_sus_time); xstrcat(out, tmp_line); - /****** Lines 16, 17 (optional, batch only) ******/ + /****** Lines 17, 18 (optional, batch only) ******/ if (job_ptr->batch_flag) { if (one_liner) xstrcat(out, " "); @@ -617,7 +626,7 @@ line7: /****** Line 7 ******/ xstrcat(out, tmp_line); } - /****** Line 18 (optional) ******/ + /****** Line 19 (optional) ******/ if (job_ptr->comment) { if (one_liner) xstrcat(out, " "); @@ -628,7 +637,7 @@ line7: /****** Line 7 ******/ xstrcat(out, tmp_line); } - /****** Line 19 (optional) ******/ + /****** Line 20 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MIXED); @@ -640,7 +649,7 @@ line7: /****** Line 7 ******/ xstrcat(out, select_buf); } #ifdef HAVE_BG - /****** Line 20 (optional) ******/ + /****** Line 21 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_BLRTS_IMAGE); @@ -653,7 +662,7 @@ line7: /****** Line 7 ******/ "BlrtsImage=%s", select_buf); xstrcat(out, tmp_line); } - /****** Line 21 (optional) ******/ + /****** Line 22 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_LINUX_IMAGE); @@ -671,7 +680,7 @@ line7: /****** Line 7 ******/ #endif xstrcat(out, tmp_line); } - /****** Line 22 (optional) ******/ + /****** Line 23 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_MLOADER_IMAGE); @@ -684,7 +693,7 @@ line7: /****** Line 7 ******/ "MloaderImage=%s", select_buf); xstrcat(out, tmp_line); } - /****** Line 23 (optional) ******/ + /****** Line 24 (optional) ******/ select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, select_buf, sizeof(select_buf), SELECT_PRINT_RAMDISK_IMAGE); diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 6db15a84cb6..534622158ee 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -4198,7 +4198,11 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer) packstr(dump_job_ptr->comment, buffer); slurm_mutex_lock(&assoc_mgr_qos_lock); - packstr(acct_qos_str(assoc_mgr_qos_list, dump_job_ptr->qos), buffer); + if (assoc_mgr_qos_list) + packstr(acct_qos_str(assoc_mgr_qos_list, dump_job_ptr->qos), + buffer); + else + packstr(NULL, buffer); slurm_mutex_unlock(&assoc_mgr_qos_lock); packstr(dump_job_ptr->licenses, buffer); -- GitLab