From 46783aa726f659e8d97a229fc13c8c5eb80e9645 Mon Sep 17 00:00:00 2001 From: Don Lipari <lipari1@llnl.gov> Date: Mon, 31 Aug 2009 21:41:13 +0000 Subject: [PATCH] Initial work to add qos support to salloc/sbatch/srun --- NEWS | 1 + slurm/slurm.h.in | 2 + slurm/slurm_errno.h | 1 + src/common/slurm_errno.c | 2 + src/common/slurm_protocol_pack.c | 3 + src/salloc/opt.c | 83 ++++++++++++----------- src/salloc/opt.h | 2 +- src/salloc/salloc.c | 2 + src/sbatch/opt.c | 83 ++++++++++++----------- src/sbatch/opt.h | 2 +- src/sbatch/sbatch.c | 2 + src/scancel/opt.c | 17 +++-- src/scancel/scancel.c | 6 ++ src/scancel/scancel.h | 1 + src/slurmctld/job_mgr.c | 2 +- src/srun/allocate.c | 3 + src/srun/opt.c | 109 +++++++++++++++++-------------- src/srun/opt.h | 2 +- 18 files changed, 188 insertions(+), 135 deletions(-) diff --git a/NEWS b/NEWS index df7d322518b..e214f7edbab 100644 --- a/NEWS +++ b/NEWS @@ -77,6 +77,7 @@ documents those changes that are of interest to users and admins. -- BLUEGENE - env vars such as SLURM_NNODES, SLURM_JOB_NUM_NODES, and SLURM_JOB_CPUS_PER_NODE now reference cnode counts instead of midplane counts. SLURM_NODELIST still references midplane names. + -- Added qos support to salloc/sbatch/srun * Changes in SLURM 2.1.0-pre2 ============================= diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index e3905c9dde5..133ffbd4e02 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -578,6 +578,7 @@ typedef struct job_descriptor { /* For submit, allocate, and update requests */ uint32_t priority; /* relative priority of the job, * explicitly set only for user root, * 0 == held (don't initiate) */ + char *qos; /* Quality of Service */ char *resp_host; /* NOTE: Set by slurmctld */ char *req_nodes; /* comma separated list of required nodes * default NONE */ @@ -706,6 +707,7 @@ typedef struct job_info { time_t pre_sus_time; /* time job ran prior to last suspend */ uint32_t priority; /* relative priority of the job, * 0=held, 1=required nodes DOWN/DRAINED */ + char *qos; /* Quality of Service */ char *req_nodes; /* comma separated list of required nodes */ int *req_node_inx; /* required list index pairs into node_table: * start_range_1, end_range_1, diff --git a/slurm/slurm_errno.h b/slurm/slurm_errno.h index c57a251be8a..93605fe16ff 100644 --- a/slurm/slurm_errno.h +++ b/slurm/slurm_errno.h @@ -175,6 +175,7 @@ enum { ESLURM_INVALID_BLOCK_STATE, ESLURM_INVALID_BLOCK_LAYOUT, ESLURM_INVALID_BLOCK_NAME, + ESLURM_INVALID_QOS, /* switch specific error codes, specific values defined in plugin module */ ESLURM_SWITCH_MIN = 3000, diff --git a/src/common/slurm_errno.c b/src/common/slurm_errno.c index 5b5a2b226b5..55171b69415 100644 --- a/src/common/slurm_errno.c +++ b/src/common/slurm_errno.c @@ -214,6 +214,8 @@ static slurm_errtab_t slurm_errtab[] = { "Memory required by task is not available" }, { ESLURM_INVALID_ACCOUNT, "Job has invalid account" }, + { ESLURM_INVALID_QOS, + "Job has invalid qos" }, { ESLURM_INVALID_WCKEY, "Job has invalid wckey" }, { ESLURM_INVALID_LICENSES, diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index ab9072ac13d..f7c24917e35 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -2805,6 +2805,7 @@ _unpack_job_info_members(job_info_t * job, Buf buffer) safe_unpackstr_xmalloc(&job->account, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->network, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->comment, &uint32_tmp, buffer); + safe_unpackstr_xmalloc(&job->qos, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->licenses, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->state_desc, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job->resv_name, &uint32_tmp, buffer); @@ -3354,6 +3355,7 @@ _pack_job_desc_msg(job_desc_msg_t * job_desc_ptr, Buf buffer) packstr(job_desc_ptr->account, buffer); packstr(job_desc_ptr->comment, buffer); pack16(job_desc_ptr->nice, buffer); + packstr(job_desc_ptr->qos, buffer); pack8(job_desc_ptr->open_mode, buffer); pack8(job_desc_ptr->overcommit, buffer); @@ -3508,6 +3510,7 @@ _unpack_job_desc_msg(job_desc_msg_t ** job_desc_buffer_ptr, Buf buffer) safe_unpackstr_xmalloc(&job_desc_ptr->account, &uint32_tmp, buffer); safe_unpackstr_xmalloc(&job_desc_ptr->comment, &uint32_tmp, buffer); safe_unpack16(&job_desc_ptr->nice, buffer); + safe_unpackstr_xmalloc(&job_desc_ptr->qos, &uint32_tmp, buffer); safe_unpack8(&job_desc_ptr->open_mode, buffer); safe_unpack8(&job_desc_ptr->overcommit, buffer); diff --git a/src/salloc/opt.c b/src/salloc/opt.c index f621fe7adfd..b66be57d7e1 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -23,7 +23,7 @@ * * You should have received a copy of the GNU General Public License along * with SLURM; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ #if HAVE_CONFIG_H @@ -132,6 +132,7 @@ #define LONG_OPT_NOSHELL 0x124 #define LONG_OPT_GET_USER_ENV 0x125 #define LONG_OPT_NETWORK 0x126 +#define LONG_OPT_QOS 0x127 #define LONG_OPT_SOCKETSPERNODE 0x130 #define LONG_OPT_CORESPERSOCKET 0x131 #define LONG_OPT_THREADSPERCORE 0x132 @@ -289,6 +290,7 @@ static void _opt_default() opt.dependency = NULL; opt.account = NULL; opt.comment = NULL; + opt.qos = NULL; opt.distribution = SLURM_DIST_UNKNOWN; opt.plane_size = NO_VAL; @@ -576,49 +578,50 @@ void set_options(const int argc, char **argv) {"nodelist", required_argument, 0, 'w'}, {"wait", required_argument, 0, 'W'}, {"exclude", required_argument, 0, 'x'}, + {"acctg-freq", required_argument, 0, LONG_OPT_ACCTG_FREQ}, + {"begin", required_argument, 0, LONG_OPT_BEGIN}, + {"bell", no_argument, 0, LONG_OPT_BELL}, + {"blrts-image", required_argument, 0, LONG_OPT_BLRTS_IMAGE}, + {"cnload-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, + {"comment", required_argument, 0, LONG_OPT_COMMENT}, + {"conn-type", required_argument, 0, LONG_OPT_CONNTYPE}, {"contiguous", no_argument, 0, LONG_OPT_CONT}, + {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET}, + {"cpu_bind", required_argument, 0, LONG_OPT_CPU_BIND}, {"exclusive", no_argument, 0, LONG_OPT_EXCLUSIVE}, + {"get-user-env", optional_argument, 0, LONG_OPT_GET_USER_ENV}, + {"gid", required_argument, 0, LONG_OPT_GID}, + {"hint", required_argument, 0, LONG_OPT_HINT}, + {"ioload-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, + {"jobid", required_argument, 0, LONG_OPT_JOBID}, + {"linux-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, + {"mail-type", required_argument, 0, LONG_OPT_MAIL_TYPE}, + {"mail-user", required_argument, 0, LONG_OPT_MAIL_USER}, + {"mem", required_argument, 0, LONG_OPT_MEM}, + {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"mem_bind", required_argument, 0, LONG_OPT_MEM_BIND}, + {"mincores", required_argument, 0, LONG_OPT_MINCORES}, {"mincpus", required_argument, 0, LONG_OPT_MINCPU}, {"minsockets", required_argument, 0, LONG_OPT_MINSOCKETS}, - {"mincores", required_argument, 0, LONG_OPT_MINCORES}, {"minthreads", required_argument, 0, LONG_OPT_MINTHREADS}, - {"mem", required_argument, 0, LONG_OPT_MEM}, - {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU}, - {"hint", required_argument, 0, LONG_OPT_HINT}, - {"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE}, - {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET}, - {"threads-per-core", required_argument, 0, LONG_OPT_THREADSPERCORE}, + {"mloader-image", required_argument, 0, LONG_OPT_MLOADER_IMAGE}, + {"network", required_argument, 0, LONG_OPT_NETWORK}, + {"nice", optional_argument, 0, LONG_OPT_NICE}, + {"no-bell", no_argument, 0, LONG_OPT_NO_BELL}, + {"no-shell", no_argument, 0, LONG_OPT_NOSHELL}, + {"ntasks-per-core", required_argument, 0, LONG_OPT_NTASKSPERCORE}, {"ntasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE}, {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET}, - {"ntasks-per-core", required_argument, 0, LONG_OPT_NTASKSPERCORE}, + {"qos", required_argument, 0, LONG_OPT_QOS}, + {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, + {"reboot", no_argument, 0, LONG_OPT_REBOOT}, + {"reservation", required_argument, 0, LONG_OPT_RESERVATION}, + {"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE}, {"tasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE}, + {"threads-per-core", required_argument, 0, LONG_OPT_THREADSPERCORE}, {"tmp", required_argument, 0, LONG_OPT_TMP}, {"uid", required_argument, 0, LONG_OPT_UID}, - {"gid", required_argument, 0, LONG_OPT_GID}, - {"conn-type", required_argument, 0, LONG_OPT_CONNTYPE}, - {"begin", required_argument, 0, LONG_OPT_BEGIN}, - {"mail-type", required_argument, 0, LONG_OPT_MAIL_TYPE}, - {"mail-user", required_argument, 0, LONG_OPT_MAIL_USER}, - {"nice", optional_argument, 0, LONG_OPT_NICE}, - {"bell", no_argument, 0, LONG_OPT_BELL}, - {"no-bell", no_argument, 0, LONG_OPT_NO_BELL}, - {"jobid", required_argument, 0, LONG_OPT_JOBID}, - {"comment", required_argument, 0, LONG_OPT_COMMENT}, - {"reboot", no_argument, 0, LONG_OPT_REBOOT}, - {"blrts-image", required_argument, 0, LONG_OPT_BLRTS_IMAGE}, - {"linux-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, - {"cnload-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, - {"mloader-image", required_argument, 0, LONG_OPT_MLOADER_IMAGE}, - {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, - {"ioload-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, - {"acctg-freq", required_argument, 0, LONG_OPT_ACCTG_FREQ}, - {"no-shell", no_argument, 0, LONG_OPT_NOSHELL}, - {"get-user-env", optional_argument, 0, LONG_OPT_GET_USER_ENV}, - {"network", required_argument, 0, LONG_OPT_NETWORK}, - {"cpu_bind", required_argument, 0, LONG_OPT_CPU_BIND}, - {"mem_bind", required_argument, 0, LONG_OPT_MEM_BIND}, {"wckey", required_argument, 0, LONG_OPT_WCKEY}, - {"reservation", required_argument, 0, LONG_OPT_RESERVATION}, {NULL, 0, 0, 0} }; char *opt_string = "+B:c:C:d:D:F:g:hHIJ:kK::L:m:n:N:Op:P:QRst:uU:vVw:W:x:"; @@ -928,6 +931,10 @@ void set_options(const int argc, char **argv) xfree(opt.comment); opt.comment = xstrdup(optarg); break; + case LONG_OPT_QOS: + xfree(opt.qos); + opt.qos = xstrdup(optarg); + break; case LONG_OPT_SOCKETSPERNODE: get_resource_arg_range( optarg, "sockets-per-node", &opt.min_sockets_per_node, @@ -1624,6 +1631,7 @@ static void _opt_list() info("comment : %s", opt.comment); info("dependency : %s", opt.dependency); info("network : %s", opt.network); + info("qos : %s", opt.qos); str = print_constraints(); info("constraints : %s", str); xfree(str); @@ -1704,7 +1712,7 @@ static void _usage(void) " [--mail-type=type] [--mail-user=user][--nice[=value]]\n" " [--bell] [--no-bell] [--kill-command[=signal]]\n" " [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n" -" [--network=type] [--mem-per-cpu=MB]\n" +" [--network=type] [--mem-per-cpu=MB] [--qos=qos]\n" " [--cpu_bind=...] [--mem_bind=...] [--reservation=name]\n" " [executable [args...]]\n"); } @@ -1742,13 +1750,14 @@ static void _help(void) " --ntasks-per-node=n number of tasks to invoke on each node\n" " -N, --nodes=N number of nodes on which to run (N = min[-max])\n" " -O, --overcommit overcommit resources\n" -" -Q, --quiet quiet mode (suppress informational messages)\n" " -p, --partition=partition partition requested\n" " -P, --dependency=type:jobid defer job until condition on jobid is satisfied\n" +" --qos=qos quality of service\n" +" -Q, --quiet quiet mode (suppress informational messages)\n" " -s, --share share nodes with other jobs\n" " -t, --time=minutes time limit\n" -" -U, --account=name charge job to specified account\n" " --uid=user_id user ID to run job as (user root only)\n" +" -U, --account=name charge job to specified account\n" " -v, --verbose verbose mode (multiple -v's increase verbosity)\n" "\n" "Constraint options:\n" @@ -1756,9 +1765,9 @@ static void _help(void) " -C, --constraint=list specify a list of constraints\n" " -F, --nodefile=filename request a specific list of hosts\n" " --mem=MB minimum amount of real memory\n" +" --mincores=n minimum number of cores per socket\n" " --mincpus=n minimum number of logical processors (threads) per node\n" " --minsockets=n minimum number of sockets per node\n" -" --mincores=n minimum number of cores per socket\n" " --minthreads=n minimum number of threads per core\n" " --reservation=name allocate resources from named reservation\n" " --tmp=MB minimum amount of temporary disk\n" diff --git a/src/salloc/opt.h b/src/salloc/opt.h index 1772de9dc7c..0e1cb3bd21e 100644 --- a/src/salloc/opt.h +++ b/src/salloc/opt.h @@ -96,7 +96,7 @@ typedef struct salloc_options { int nice; /* --nice */ char *account; /* --account, -U acct_name */ char *comment; /* --comment */ - + char *qos; /* --qos */ int immediate; /* -I, --immediate */ bool hold; /* --hold, -H */ diff --git a/src/salloc/salloc.c b/src/salloc/salloc.c index a87e28d014e..01d1c7d2f10 100644 --- a/src/salloc/salloc.c +++ b/src/salloc/salloc.c @@ -432,6 +432,8 @@ static int _fill_job_desc_from_opts(job_desc_msg_t *desc) desc->account = xstrdup(opt.account); if (opt.comment) desc->comment = xstrdup(opt.comment); + if (opt.qos) + desc->qos = xstrdup(opt.qos); if (opt.hold) desc->priority = 0; diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 9f163f34e9a..1c41271c284 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -23,7 +23,7 @@ * * You should have received a copy of the GNU General Public License along * with SLURM; if not, write to the Free Software Foundation, Inc., - * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA. + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. \*****************************************************************************/ #if HAVE_CONFIG_H @@ -127,6 +127,7 @@ #define LONG_OPT_WRAP 0x118 #define LONG_OPT_REQUEUE 0x119 #define LONG_OPT_NETWORK 0x120 +#define LONG_OPT_QOS 0x127 #define LONG_OPT_SOCKETSPERNODE 0x130 #define LONG_OPT_CORESPERSOCKET 0x131 #define LONG_OPT_THREADSPERCORE 0x132 @@ -293,6 +294,7 @@ static void _opt_default() opt.dependency = NULL; opt.account = NULL; opt.comment = NULL; + opt.qos = NULL; opt.distribution = SLURM_DIST_UNKNOWN; opt.plane_size = NO_VAL; @@ -627,53 +629,54 @@ static struct option long_options[] = { {"version", no_argument, 0, 'V'}, {"nodelist", required_argument, 0, 'w'}, {"exclude", required_argument, 0, 'x'}, + {"acctg-freq", required_argument, 0, LONG_OPT_ACCTG_FREQ}, + {"begin", required_argument, 0, LONG_OPT_BEGIN}, + {"blrts-image", required_argument, 0, LONG_OPT_BLRTS_IMAGE}, + {"checkpoint", required_argument, 0, LONG_OPT_CHECKPOINT}, + {"checkpoint-dir",required_argument, 0, LONG_OPT_CHECKPOINT_DIR}, + {"cnload-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, + {"comment", required_argument, 0, LONG_OPT_COMMENT}, + {"conn-type", required_argument, 0, LONG_OPT_CONNTYPE}, {"contiguous", no_argument, 0, LONG_OPT_CONT}, + {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET}, + {"cpu_bind", required_argument, 0, LONG_OPT_CPU_BIND}, {"exclusive", no_argument, 0, LONG_OPT_EXCLUSIVE}, - {"mincpus", required_argument, 0, LONG_OPT_MINCPU}, - {"minsockets", required_argument, 0, LONG_OPT_MINSOCKETS}, - {"mincores", required_argument, 0, LONG_OPT_MINCORES}, - {"minthreads", required_argument, 0, LONG_OPT_MINTHREADS}, - {"mem", required_argument, 0, LONG_OPT_MEM}, - {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"get-user-env", optional_argument, 0, LONG_OPT_GET_USER_ENV}, + {"gid", required_argument, 0, LONG_OPT_GID}, {"hint", required_argument, 0, LONG_OPT_HINT}, - {"tmp", required_argument, 0, LONG_OPT_TMP}, + {"ioload-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, {"jobid", required_argument, 0, LONG_OPT_JOBID}, - {"uid", required_argument, 0, LONG_OPT_UID}, - {"gid", required_argument, 0, LONG_OPT_GID}, - {"conn-type", required_argument, 0, LONG_OPT_CONNTYPE}, - {"begin", required_argument, 0, LONG_OPT_BEGIN}, + {"linux-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, {"mail-type", required_argument, 0, LONG_OPT_MAIL_TYPE}, {"mail-user", required_argument, 0, LONG_OPT_MAIL_USER}, + {"mem", required_argument, 0, LONG_OPT_MEM}, + {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU}, + {"mem_bind", required_argument, 0, LONG_OPT_MEM_BIND}, + {"mincores", required_argument, 0, LONG_OPT_MINCORES}, + {"mincpus", required_argument, 0, LONG_OPT_MINCPU}, + {"minsockets", required_argument, 0, LONG_OPT_MINSOCKETS}, + {"minthreads", required_argument, 0, LONG_OPT_MINTHREADS}, + {"mloader-image", required_argument, 0, LONG_OPT_MLOADER_IMAGE}, + {"network", required_argument, 0, LONG_OPT_NETWORK}, {"nice", optional_argument, 0, LONG_OPT_NICE}, {"no-requeue", no_argument, 0, LONG_OPT_NO_REQUEUE}, - {"requeue", no_argument, 0, LONG_OPT_REQUEUE}, - {"comment", required_argument, 0, LONG_OPT_COMMENT}, - {"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE}, - {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET}, - {"threads-per-core", required_argument, 0, LONG_OPT_THREADSPERCORE}, + {"ntasks-per-core", required_argument, 0, LONG_OPT_NTASKSPERCORE}, {"ntasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE}, {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET}, - {"ntasks-per-core", required_argument, 0, LONG_OPT_NTASKSPERCORE}, - {"blrts-image", required_argument, 0, LONG_OPT_BLRTS_IMAGE}, - {"linux-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, - {"cnload-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, - {"mloader-image", required_argument, 0, LONG_OPT_MLOADER_IMAGE}, + {"open-mode", required_argument, 0, LONG_OPT_OPEN_MODE}, + {"propagate", optional_argument, 0, LONG_OPT_PROPAGATE}, + {"qos", required_argument, 0, LONG_OPT_QOS}, {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, - {"ioload-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, {"reboot", no_argument, 0, LONG_OPT_REBOOT}, + {"requeue", no_argument, 0, LONG_OPT_REQUEUE}, + {"reservation", required_argument, 0, LONG_OPT_RESERVATION}, + {"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE}, {"tasks-per-node",required_argument, 0, LONG_OPT_NTASKSPERNODE}, - {"wrap", required_argument, 0, LONG_OPT_WRAP}, - {"get-user-env", optional_argument, 0, LONG_OPT_GET_USER_ENV}, - {"open-mode", required_argument, 0, LONG_OPT_OPEN_MODE}, - {"acctg-freq", required_argument, 0, LONG_OPT_ACCTG_FREQ}, - {"propagate", optional_argument, 0, LONG_OPT_PROPAGATE}, - {"network", required_argument, 0, LONG_OPT_NETWORK}, - {"cpu_bind", required_argument, 0, LONG_OPT_CPU_BIND}, - {"mem_bind", required_argument, 0, LONG_OPT_MEM_BIND}, + {"threads-per-core", required_argument, 0, LONG_OPT_THREADSPERCORE}, + {"tmp", required_argument, 0, LONG_OPT_TMP}, + {"uid", required_argument, 0, LONG_OPT_UID}, {"wckey", required_argument, 0, LONG_OPT_WCKEY}, - {"reservation", required_argument, 0, LONG_OPT_RESERVATION}, - {"checkpoint", required_argument, 0, LONG_OPT_CHECKPOINT}, - {"checkpoint-dir",required_argument, 0, LONG_OPT_CHECKPOINT_DIR}, + {"wrap", required_argument, 0, LONG_OPT_WRAP}, {NULL, 0, 0, 0} }; @@ -1358,6 +1361,10 @@ static void _set_options(int argc, char **argv) xfree(opt.comment); opt.comment = xstrdup(optarg); break; + case LONG_OPT_QOS: + xfree(opt.qos); + opt.qos = xstrdup(optarg); + break; case LONG_OPT_SOCKETSPERNODE: get_resource_arg_range( optarg, "sockets-per-node", &opt.min_sockets_per_node, @@ -2445,6 +2452,7 @@ static void _opt_list() info("account : %s", opt.account); info("comment : %s", opt.comment); info("dependency : %s", opt.dependency); + info("qos : %s", opt.qos); str = print_constraints(); info("constraints : %s", str); xfree(str); @@ -2529,7 +2537,7 @@ static void _usage(void) " [--mail-type=type] [--mail-user=user][--nice[=value]]\n" " [--requeue] [--no-requeue] [--ntasks-per-node=n] [--propagate]\n" " [--nodefile=file] [--nodelist=hosts] [--exclude=hosts]\n" -" [--network=type] [--mem-per-cpu=MB]\n" +" [--network=type] [--mem-per-cpu=MB] [--qos=qos]\n" " [--cpu_bind=...] [--mem_bind=...] [--reservation=name]\n" " executable [args...]\n"); } @@ -2571,6 +2579,7 @@ static void _help(void) " -p, --partition=partition partition requested\n" " --propagate[=rlimits] propagate all [or specific list of] rlimits\n" " -P, --dependency=type:jobid defer job until condition on jobid is satisfied\n" +" --qos=qos quality of service\n" " -Q, --quiet quiet mode (suppress informational messages)\n" " --requeue if set, permit the job to be requeued\n" " -t, --time=minutes time limit\n" @@ -2580,16 +2589,16 @@ static void _help(void) " -v, --verbose verbose mode (multiple -v's increase verbosity)\n" "\n" "Constraint options:\n" +" --contiguous demand a contiguous range of nodes\n" " -C, --constraint=list specify a list of constraints\n" " -F, --nodefile=filename request a specific list of hosts\n" " --mem=MB minimum amount of real memory\n" +" --mincores=n minimum number of cores per socket\n" " --mincpus=n minimum number of logical processors (threads) per node\n" " --minsockets=n minimum number of sockets per node\n" -" --mincores=n minimum number of cores per socket\n" " --minthreads=n minimum number of threads per core\n" " --reservation=name allocate resources from named reservation\n" " --tmp=MB minimum amount of temporary disk\n" -" --contiguous demand a contiguous range of nodes\n" " -w, --nodelist=hosts... request a specific list of hosts\n" " -x, --exclude=hosts... exclude a specific list of hosts\n" "\n" diff --git a/src/sbatch/opt.h b/src/sbatch/opt.h index dde2d8f1bd2..4515207efee 100644 --- a/src/sbatch/opt.h +++ b/src/sbatch/opt.h @@ -100,7 +100,7 @@ typedef struct sbatch_options { char *account; /* --account, -U acct_name */ char *comment; /* --comment */ char *propagate; /* --propagate[=RLIMIT_CORE,...]*/ - + char *qos; /* --qos */ int immediate; /* -i, --immediate */ bool hold; /* --hold, -H */ diff --git a/src/sbatch/sbatch.c b/src/sbatch/sbatch.c index df770488cf7..9dd518d5f14 100644 --- a/src/sbatch/sbatch.c +++ b/src/sbatch/sbatch.c @@ -211,6 +211,8 @@ static int fill_job_desc_from_opts(job_desc_msg_t *desc) desc->account = xstrdup(opt.account); if (opt.comment) desc->comment = xstrdup(opt.comment); + if (opt.qos) + desc->qos = xstrdup(opt.qos); if (opt.hold) desc->priority = 0; diff --git a/src/scancel/opt.c b/src/scancel/opt.c index ae773b402d9..c068e907225 100644 --- a/src/scancel/opt.c +++ b/src/scancel/opt.c @@ -120,7 +120,7 @@ static void _print_version (void); static void _xlate_job_step_ids(char **rest); /* translate job state name to number */ -static uint16_t _xlate_state_name(const char *state_name); +static uint16_t _xlate_state_name(const char *state_name, bool env_var); /* translate name name to number */ static uint16_t _xlate_signal_name(const char *signal_name); @@ -151,7 +151,7 @@ int initialize_and_process_args(int argc, char *argv[]) } static uint16_t -_xlate_state_name(const char *state_name) +_xlate_state_name(const char *state_name, bool env_var) { enum job_states i; char *state_names; @@ -175,7 +175,12 @@ _xlate_state_name(const char *state_name) return JOB_CONFIGURING; } - fprintf (stderr, "Invalid job state specified: %s\n", state_name); + if (env_var) + fprintf(stderr, "Unrecognized SCANCEL_STATE value: %s\n", + state_name); + else + fprintf(stderr, "Invalid job state specified: %s\n", + state_name); state_names = xstrdup(job_state_string(0)); for (i=1; i<JOB_END; i++) { xstrcat(state_names, ","); @@ -293,9 +298,7 @@ static void _opt_env() } if ( (val=getenv("SCANCEL_STATE")) ) { - opt.state = true; - error ("Unrecognized SCANCEL_STATE value: %s", - val); + opt.state = _xlate_state_name(val, true); } if ( (val=getenv("SCANCEL_USER")) ) { @@ -378,7 +381,7 @@ static void _opt_args(int argc, char **argv) opt.signal = _xlate_signal_name(optarg); break; case (int)'t': - opt.state = _xlate_state_name(optarg); + opt.state = _xlate_state_name(optarg, false); break; case (int)'u': opt.user_name = xstrdup(optarg); diff --git a/src/scancel/scancel.c b/src/scancel/scancel.c index 665ceabfa27..b571b9ffec5 100644 --- a/src/scancel/scancel.c +++ b/src/scancel/scancel.c @@ -188,6 +188,12 @@ _filter_job_records (void) continue; } + if (opt.account != NULL && + (strcmp(job_ptr[i].account, opt.account) != 0)) { + job_ptr[i].job_id = 0; + continue; + } + if (opt.job_name != NULL && (strcmp(job_ptr[i].name, opt.job_name) != 0)) { job_ptr[i].job_id = 0; diff --git a/src/scancel/scancel.h b/src/scancel/scancel.h index 1d062667720..e9301c00566 100644 --- a/src/scancel/scancel.h +++ b/src/scancel/scancel.h @@ -44,6 +44,7 @@ #endif typedef struct scancel_options { + char *account; /* --account= */ bool batch; /* --batch, -b */ bool ctld; /* --ctld */ bool interactive; /* --interactive, -i */ diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 03be613cbb9..89e748bd258 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -4158,7 +4158,7 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer) packstr(dump_job_ptr->comment, buffer); slurm_mutex_lock(&assoc_mgr_qos_lock); - packstr(acct_qos_str(assoc_mgr_qos_list, dump_job_ptr->qos)); + packstr(acct_qos_str(assoc_mgr_qos_list, dump_job_ptr->qos), buffer); slurm_mutex_unlock(&assoc_mgr_qos_lock); packstr(dump_job_ptr->licenses, buffer); diff --git a/src/srun/allocate.c b/src/srun/allocate.c index 2ba5df0255a..091bea2dd80 100644 --- a/src/srun/allocate.c +++ b/src/srun/allocate.c @@ -604,6 +604,8 @@ job_desc_msg_create_from_opts () j->account = xstrdup(opt.account); if (opt.comment) j->comment = xstrdup(opt.comment); + if (opt.qos) + j->qos = xstrdup(opt.qos); if (opt.hold) j->priority = 0; @@ -693,6 +695,7 @@ job_desc_msg_destroy(job_desc_msg_t *j) if (j) { xfree(j->account); xfree(j->comment); + xfree(j->qos); xfree(j); } } diff --git a/src/srun/opt.c b/src/srun/opt.c index 25121fefa9a..0ffdc7f130b 100644 --- a/src/srun/opt.c +++ b/src/srun/opt.c @@ -153,6 +153,7 @@ #define LONG_OPT_MEM_BIND 0x120 #define LONG_OPT_MULTI 0x122 #define LONG_OPT_COMMENT 0x124 +#define LONG_OPT_QOS 0x127 #define LONG_OPT_SOCKETSPERNODE 0x130 #define LONG_OPT_CORESPERSOCKET 0x131 #define LONG_OPT_THREADSPERCORE 0x132 @@ -351,6 +352,7 @@ static void _opt_default() opt.dependency = NULL; opt.account = NULL; opt.comment = NULL; + opt.qos = NULL; opt.distribution = SLURM_DIST_UNKNOWN; opt.plane_size = NO_VAL; @@ -737,67 +739,68 @@ static void set_options(const int argc, char **argv) {"exclude", required_argument, 0, 'x'}, {"disable-status", no_argument, 0, 'X'}, {"no-allocate", no_argument, 0, 'Z'}, + {"acctg-freq", required_argument, 0, LONG_OPT_ACCTG_FREQ}, + {"begin", required_argument, 0, LONG_OPT_BEGIN}, + {"blrts-image", required_argument, 0, LONG_OPT_BLRTS_IMAGE}, + {"checkpoint", required_argument, 0, LONG_OPT_CHECKPOINT}, + {"checkpoint-dir", required_argument, 0, LONG_OPT_CHECKPOINT_DIR}, + {"cnload-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, + {"comment", required_argument, 0, LONG_OPT_COMMENT}, + {"conn-type", required_argument, 0, LONG_OPT_CONNTYPE}, {"contiguous", no_argument, 0, LONG_OPT_CONT}, - {"exclusive", no_argument, 0, LONG_OPT_EXCLUSIVE}, + {"core", required_argument, 0, LONG_OPT_CORE}, + {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET}, {"cpu_bind", required_argument, 0, LONG_OPT_CPU_BIND}, + {"debugger-test", no_argument, 0, LONG_OPT_DEBUG_TS}, + {"epilog", required_argument, 0, LONG_OPT_EPILOG}, + {"exclusive", no_argument, 0, LONG_OPT_EXCLUSIVE}, + {"get-user-env", optional_argument, 0, LONG_OPT_GET_USER_ENV}, + {"gid", required_argument, 0, LONG_OPT_GID}, + {"help", no_argument, 0, LONG_OPT_HELP}, + {"hint", required_argument, 0, LONG_OPT_HINT}, + {"ioload-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, + {"jobid", required_argument, 0, LONG_OPT_JOBID}, + {"linux-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, + {"mail-type", required_argument, 0, LONG_OPT_MAIL_TYPE}, + {"mail-user", required_argument, 0, LONG_OPT_MAIL_USER}, + {"max-exit-timeout", required_argument, 0, LONG_OPT_XTO}, + {"max-launch-time", required_argument, 0, LONG_OPT_LAUNCH}, + {"mem", required_argument, 0, LONG_OPT_MEM}, + {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU}, {"mem_bind", required_argument, 0, LONG_OPT_MEM_BIND}, - {"core", required_argument, 0, LONG_OPT_CORE}, + {"mincores", required_argument, 0, LONG_OPT_MINCORES}, {"mincpus", required_argument, 0, LONG_OPT_MINCPUS}, {"minsockets", required_argument, 0, LONG_OPT_MINSOCKETS}, - {"mincores", required_argument, 0, LONG_OPT_MINCORES}, {"minthreads", required_argument, 0, LONG_OPT_MINTHREADS}, - {"mem", required_argument, 0, LONG_OPT_MEM}, - {"mem-per-cpu", required_argument, 0, LONG_OPT_MEM_PER_CPU}, - {"hint", required_argument, 0, LONG_OPT_HINT}, + {"mloader-image", required_argument, 0, LONG_OPT_MLOADER_IMAGE}, {"mpi", required_argument, 0, LONG_OPT_MPI}, - {"resv-ports", optional_argument, 0, LONG_OPT_RESV_PORTS}, - {"tmp", required_argument, 0, LONG_OPT_TMP}, - {"jobid", required_argument, 0, LONG_OPT_JOBID}, {"msg-timeout", required_argument, 0, LONG_OPT_TIMEO}, - {"max-launch-time", required_argument, 0, LONG_OPT_LAUNCH}, - {"max-exit-timeout", required_argument, 0, LONG_OPT_XTO}, - {"uid", required_argument, 0, LONG_OPT_UID}, - {"gid", required_argument, 0, LONG_OPT_GID}, - {"debugger-test", no_argument, 0, LONG_OPT_DEBUG_TS}, - {"help", no_argument, 0, LONG_OPT_HELP}, - {"usage", no_argument, 0, LONG_OPT_USAGE}, - {"conn-type", required_argument, 0, LONG_OPT_CONNTYPE}, - {"test-only", no_argument, 0, LONG_OPT_TEST_ONLY}, + {"multi-prog", no_argument, 0, LONG_OPT_MULTI}, {"network", required_argument, 0, LONG_OPT_NETWORK}, - {"propagate", optional_argument, 0, LONG_OPT_PROPAGATE}, - {"prolog", required_argument, 0, LONG_OPT_PROLOG}, - {"epilog", required_argument, 0, LONG_OPT_EPILOG}, - {"begin", required_argument, 0, LONG_OPT_BEGIN}, - {"mail-type", required_argument, 0, LONG_OPT_MAIL_TYPE}, - {"mail-user", required_argument, 0, LONG_OPT_MAIL_USER}, - {"task-prolog", required_argument, 0, LONG_OPT_TASK_PROLOG}, - {"task-epilog", required_argument, 0, LONG_OPT_TASK_EPILOG}, {"nice", optional_argument, 0, LONG_OPT_NICE}, - {"multi-prog", no_argument, 0, LONG_OPT_MULTI}, - {"comment", required_argument, 0, LONG_OPT_COMMENT}, - {"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE}, - {"cores-per-socket", required_argument, 0, LONG_OPT_CORESPERSOCKET}, - {"threads-per-core", required_argument, 0, LONG_OPT_THREADSPERCORE}, + {"ntasks-per-core", required_argument, 0, LONG_OPT_NTASKSPERCORE}, {"ntasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE}, {"ntasks-per-socket",required_argument, 0, LONG_OPT_NTASKSPERSOCKET}, - {"ntasks-per-core", required_argument, 0, LONG_OPT_NTASKSPERCORE}, - {"tasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE}, - {"blrts-image", required_argument, 0, LONG_OPT_BLRTS_IMAGE}, - {"linux-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, - {"cnload-image", required_argument, 0, LONG_OPT_LINUX_IMAGE}, - {"mloader-image", required_argument, 0, LONG_OPT_MLOADER_IMAGE}, - {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, - {"ioload-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, - {"reboot", no_argument, 0, LONG_OPT_REBOOT}, - {"get-user-env", optional_argument, 0, LONG_OPT_GET_USER_ENV}, - {"pty", no_argument, 0, LONG_OPT_PTY}, - {"checkpoint", required_argument, 0, LONG_OPT_CHECKPOINT}, - {"checkpoint-dir", required_argument, 0, LONG_OPT_CHECKPOINT_DIR}, {"open-mode", required_argument, 0, LONG_OPT_OPEN_MODE}, - {"acctg-freq", required_argument, 0, LONG_OPT_ACCTG_FREQ}, - {"wckey", required_argument, 0, LONG_OPT_WCKEY}, + {"prolog", required_argument, 0, LONG_OPT_PROLOG}, + {"propagate", optional_argument, 0, LONG_OPT_PROPAGATE}, + {"pty", no_argument, 0, LONG_OPT_PTY}, + {"qos", required_argument, 0, LONG_OPT_QOS}, + {"ramdisk-image", required_argument, 0, LONG_OPT_RAMDISK_IMAGE}, + {"reboot", no_argument, 0, LONG_OPT_REBOOT}, {"reservation", required_argument, 0, LONG_OPT_RESERVATION}, {"restart-dir", required_argument, 0, LONG_OPT_RESTART_DIR}, + {"resv-ports", optional_argument, 0, LONG_OPT_RESV_PORTS}, + {"sockets-per-node", required_argument, 0, LONG_OPT_SOCKETSPERNODE}, + {"task-epilog", required_argument, 0, LONG_OPT_TASK_EPILOG}, + {"task-prolog", required_argument, 0, LONG_OPT_TASK_PROLOG}, + {"tasks-per-node", required_argument, 0, LONG_OPT_NTASKSPERNODE}, + {"test-only", no_argument, 0, LONG_OPT_TEST_ONLY}, + {"threads-per-core", required_argument, 0, LONG_OPT_THREADSPERCORE}, + {"tmp", required_argument, 0, LONG_OPT_TMP}, + {"uid", required_argument, 0, LONG_OPT_UID}, + {"usage", no_argument, 0, LONG_OPT_USAGE}, + {"wckey", required_argument, 0, LONG_OPT_WCKEY}, {NULL, 0, 0, 0} }; char *opt_string = "+aAbB:c:C:d:D:e:Eg:Hi:IjJ:kKlL:m:n:N:" @@ -1235,6 +1238,10 @@ static void set_options(const int argc, char **argv) xfree(opt.comment); opt.comment = xstrdup(optarg); break; + case LONG_OPT_QOS: + xfree(opt.qos); + opt.qos = xstrdup(optarg); + break; case LONG_OPT_SOCKETSPERNODE: get_resource_arg_range( optarg, "sockets-per-node", &opt.min_sockets_per_node, @@ -2057,6 +2064,7 @@ static void _opt_list() info("dependency : %s", opt.dependency); info("exclusive : %s", tf_(opt.exclusive)); + info("qos : %s", opt.qos); if (opt.shared != (uint16_t) NO_VAL) info("shared : %u", opt.shared); str = print_constraints(); @@ -2140,7 +2148,7 @@ static void _usage(void) " [--jobid=id] [--verbose] [--slurmd_debug=#]\n" " [--core=type] [-T threads] [-W sec] [--checkpoint=time]\n" " [--checkpoint-dir=dir] [--licenses=names]\n" -" [--restart-dir=dir]\n" +" [--restart-dir=dir] [--qos=qos]\n" " [--contiguous] [--mincpus=n] [--mem=MB] [--tmp=MB] [-C list]\n" " [--mpi=type] [--account=name] [--dependency=type:jobid]\n" " [--kill-on-bad-exit] [--propagate[=rlimits] [--comment=name]\n" @@ -2219,6 +2227,7 @@ static void _help(void) #endif " -P, --dependency=type:jobid defer job until condition on jobid is satisfied\n" " -q, --quit-on-interrupt quit on single Ctrl-C\n" +" --qos=qos quality of service\n" " -Q, --quiet quiet mode (suppress informational messages)\n" " -r, --relative=n run job step relative to node n of allocation\n" " --restart-dir=dir directory of checkpoint image files to restart\n" @@ -2236,13 +2245,13 @@ static void _help(void) " -X, --disable-status Disable Ctrl-C status feature\n" "\n" "Constraint options:\n" -" -C, --constraint=list specify a list of constraints\n" " --contiguous demand a contiguous range of nodes\n" -" --mincpus=n minimum number of logical processors (threads) per node\n" +" -C, --constraint=list specify a list of constraints\n" +" --mem=MB minimum amount of real memory\n" " --mincores=n minimum number of cores per socket\n" +" --mincpus=n minimum number of logical processors (threads) per node\n" " --minsockets=n minimum number of sockets per node\n" " --minthreads=n minimum number of threads per core\n" -" --mem=MB minimum amount of real memory\n" " --reservation=name allocate resources from named reservation\n" " --tmp=MB minimum amount of temporary disk\n" " -w, --nodelist=hosts... request a specific list of hosts\n" diff --git a/src/srun/opt.h b/src/srun/opt.h index 58c8316cb24..3159d6e93f9 100644 --- a/src/srun/opt.h +++ b/src/srun/opt.h @@ -139,7 +139,7 @@ typedef struct srun_options { int nice; /* --nice */ char *account; /* --account, -U acct_name */ char *comment; /* --comment */ - + char *qos; /* --qos */ char *ofname; /* --output -o filename */ char *ifname; /* --input -i filename */ char *efname; /* --error, -e filename */ -- GitLab