From a40de01b12533e1881a067810882f4155b3bea07 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Fri, 6 Feb 2009 21:41:54 +0000 Subject: [PATCH] Add partition-specific DefaultTime (default time limit for jobs, if not specified use MaxTime for the partition. Patch from Par Andersson, National Supercomputer Centre, Sweden. --- NEWS | 5 ++++- doc/man/man1/sinfo.1 | 3 +++ doc/man/man5/slurm.conf.5 | 6 ++++++ slurm/slurm.h.in | 1 + src/api/init_msg.c | 1 + src/api/partition_info.c | 13 +++++++++++++ src/common/read_config.c | 17 +++++++++++++++++ src/common/read_config.h | 1 + src/common/slurm_protocol_pack.c | 3 +++ src/scontrol/update_part.c | 10 ++++++++++ src/sinfo/opts.c | 8 ++++++++ src/sinfo/print.c | 20 ++++++++++++++++++++ src/sinfo/print.h | 4 ++++ src/sinfo/sinfo.c | 4 ++++ src/sinfo/sinfo.h | 1 + src/slurmctld/job_mgr.c | 4 ++++ src/slurmctld/partition_mgr.c | 18 +++++++++++++++++- src/slurmctld/read_config.c | 10 +++++++++- src/slurmctld/slurmctld.h | 1 + 19 files changed, 127 insertions(+), 3 deletions(-) diff --git a/NEWS b/NEWS index c20bdac64eb..96196151b5d 100644 --- a/NEWS +++ b/NEWS @@ -11,7 +11,10 @@ documents those changes that are of interest to users and admins. -- Added sacctmgr command "show config". -- Added the scancel option --nodelist to cancel any jobs running on a given list of nodes. - + -- Add partition-specific DefaultTime (default time limit for jobs, + if not specified use MaxTime for the partition. Patch from Par + Andersson, National Supercomputer Centre, Sweden. + * Changes in SLURM 1.4.0-pre7 ============================= -- Bug fix for preemption with select/cons_res when there are no idle nodes. diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1 index d9ebfaaa014..2abff53dd32 100644 --- a/doc/man/man1/sinfo.1 +++ b/doc/man/man1/sinfo.1 @@ -153,6 +153,9 @@ Jobs may share nodes, "yes", "no", or "force" \fB%l\fR Maximum time for any job in the format "days\-hours:minutes:seconds" .TP +\fB%L\fR +Default time for any job in the format "days\-hours:minutes:seconds" +.TP \fB%m\fR Size of memory per node in megabytes .TP diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index e0d4e50fcf7..e72e4958f1f 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -1781,6 +1781,12 @@ Time resolution is one minute and second values are rounded up to the next minute. This limit does not apply to jobs executed by SlurmUser or user root. +.TP +\fBDefaultTime\fR +Run time limit used for jobs that don't specify a value. If not set +then MaxTime will be used. +Format is the same as for MaxTime. + .TP \fBMinNodes\fR Minimum count of nodes (or base partitions for BlueGene systems) which diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index 463bc5fd5fb..07fdfa3df3c 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -910,6 +910,7 @@ typedef struct partition_info { uint32_t max_nodes; /* per job or INFINITE */ uint16_t max_share; /* number of jobs to gang schedule */ uint32_t max_time; /* minutes or INFINITE */ + uint32_t default_time; /* minutes, NO_VAL or INFINITE */ uint32_t min_nodes; /* per job */ char *name; /* name of the partition */ int *node_inx; /* list index pairs into node_table: diff --git a/src/api/init_msg.c b/src/api/init_msg.c index e124032cd8a..9b07a660ccb 100644 --- a/src/api/init_msg.c +++ b/src/api/init_msg.c @@ -151,6 +151,7 @@ void slurm_init_part_desc_msg (update_part_msg_t * update_part_msg) update_part_msg->name = NULL; update_part_msg->nodes = NULL; update_part_msg->allow_groups = NULL; + update_part_msg->default_time = (uint32_t) NO_VAL; update_part_msg->max_time = (uint32_t) NO_VAL; update_part_msg->max_nodes = NO_VAL; update_part_msg->min_nodes = NO_VAL; diff --git a/src/api/partition_info.c b/src/api/partition_info.c index 9bc41dd767d..b83090ca0b3 100644 --- a/src/api/partition_info.c +++ b/src/api/partition_info.c @@ -237,6 +237,19 @@ char *slurm_sprint_partition_info ( partition_info_t * part_ptr, part_ptr->node_inx[j+1]); xstrcat(out, tmp_line); } + + if (part_ptr->default_time == INFINITE) + sprintf(tmp_line, " DefaultTime=UNLIMITED "); + else if (part_ptr->default_time == NO_VAL) + sprintf(tmp_line, " DefaultTime=NONE "); + else { + char time_line[32]; + secs2time_str(part_ptr->default_time * 60, time_line, + sizeof(time_line)); + sprintf(tmp_line, " DefaultTime=%s ", time_line); + } + xstrcat(out, tmp_line); + if (one_liner) xstrcat(out, "\n"); else diff --git a/src/common/read_config.c b/src/common/read_config.c index 24b4c2b4b88..beb52b6fbe2 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -478,6 +478,7 @@ static int parse_partitionname(void **dest, slurm_parser_enum_t type, static s_p_options_t _partition_options[] = { {"AllowGroups", S_P_STRING}, {"Default", S_P_BOOLEAN}, /* YES or NO */ + {"DefaultTime", S_P_STRING}, {"DisableRootJobs", S_P_BOOLEAN}, /* YES or NO */ {"Hidden", S_P_BOOLEAN}, /* YES or NO */ {"MaxTime", S_P_STRING}, @@ -543,6 +544,22 @@ static int parse_partitionname(void **dest, slurm_parser_enum_t type, xfree(tmp); } + if (!s_p_get_string(&tmp, "DefaultTime", tbl) && + !s_p_get_string(&tmp, "DefaultTime", dflt)) + p->default_time = NO_VAL; + else { + int default_time = time_str2mins(tmp); + if ((default_time < 0) && (default_time != INFINITE)) { + error("Bad value \"%s\" for DefaultTime", tmp); + destroy_partitionname(p); + s_p_hashtbl_destroy(tbl); + xfree(tmp); + return -1; + } + p->default_time = default_time; + xfree(tmp); + } + if (!s_p_get_uint32(&p->max_nodes, "MaxNodes", tbl) && !s_p_get_uint32(&p->max_nodes, "MaxNodes", dflt)) p->max_nodes = INFINITE; diff --git a/src/common/read_config.h b/src/common/read_config.h index 78584fbee8a..84005b663b8 100644 --- a/src/common/read_config.h +++ b/src/common/read_config.h @@ -148,6 +148,7 @@ typedef struct slurm_conf_partition { * default */ char *name; /* name of the partition */ bool hidden_flag; /* 1 if hidden by default */ + uint32_t default_time; /* minutes or INFINITE */ uint32_t max_time; /* minutes or INFINITE */ uint32_t max_nodes; /* per job or INFINITE */ uint32_t min_nodes; /* per job */ diff --git a/src/common/slurm_protocol_pack.c b/src/common/slurm_protocol_pack.c index 3a8967a58f2..8009126d15d 100644 --- a/src/common/slurm_protocol_pack.c +++ b/src/common/slurm_protocol_pack.c @@ -1753,6 +1753,7 @@ _pack_update_partition_msg(update_part_msg_t * msg, Buf buffer) packstr(msg->allow_groups, buffer); pack16(msg-> default_part, buffer); pack32(msg-> max_time, buffer); + pack32(msg-> default_time, buffer); pack32(msg-> max_nodes, buffer); pack32(msg-> min_nodes, buffer); packstr(msg->name, buffer); @@ -1780,6 +1781,7 @@ _unpack_update_partition_msg(update_part_msg_t ** msg, Buf buffer) safe_unpackstr_xmalloc(&tmp_ptr->allow_groups, &uint32_tmp, buffer); safe_unpack16(&tmp_ptr->default_part, buffer); safe_unpack32(&tmp_ptr->max_time, buffer); + safe_unpack32(&tmp_ptr->default_time, buffer); safe_unpack32(&tmp_ptr->max_nodes, buffer); safe_unpack32(&tmp_ptr->min_nodes, buffer); safe_unpackstr_xmalloc(&tmp_ptr->name, &uint32_tmp, buffer); @@ -2206,6 +2208,7 @@ _unpack_partition_info_members(partition_info_t * part, Buf buffer) if (part->name == NULL) part->name = xmalloc(1); /* part->name = "" implicit */ safe_unpack32(&part->max_time, buffer); + safe_unpack32(&part->default_time, buffer); safe_unpack32(&part->max_nodes, buffer); safe_unpack32(&part->min_nodes, buffer); safe_unpack32(&part->total_nodes, buffer); diff --git a/src/scontrol/update_part.c b/src/scontrol/update_part.c index c1fa0970467..b1d105b6970 100644 --- a/src/scontrol/update_part.c +++ b/src/scontrol/update_part.c @@ -70,6 +70,16 @@ scontrol_parse_part_options (int argc, char *argv[], int *update_cnt_ptr, part_msg_ptr->max_time = max_time; (*update_cnt_ptr)++; } + else if (strncasecmp(argv[i], "DefaultTime=", 12) == 0) { + int default_time = time_str2mins(&argv[i][12]); + if ((default_time < 0) && (default_time != INFINITE)) { + exit_code = 1; + error("Invalid input %s", argv[i]); + return -1; + } + part_msg_ptr->default_time = default_time; + (*update_cnt_ptr)++; + } else if (strncasecmp(argv[i], "MaxNodes=", 9) == 0) { if ((strcasecmp(&argv[i][9],"UNLIMITED") == 0) || (strcasecmp(&argv[i][8],"INFINITE") == 0)) diff --git a/src/sinfo/opts.c b/src/sinfo/opts.c index ff552fd7ad9..5f1d8195d77 100644 --- a/src/sinfo/opts.c +++ b/src/sinfo/opts.c @@ -509,6 +509,12 @@ _parse_format( char* format ) field_size, right_justify, suffix ); + } else if (field[0] == 'L') { + params.match_flags.default_time_flag = true; + format_add_default_time( params.format_list, + field_size, + right_justify, + suffix ); } else if (field[0] == 'm') { params.match_flags.memory_flag = true; format_add_memory( params.format_list, @@ -687,6 +693,8 @@ void _print_options( void ) printf("bg_flag = %s\n", params.bg_flag ? "true" : "false"); printf("cpus_flag = %s\n", params.match_flags.cpus_flag ? "true" : "false"); + printf("default_time_flag =%s\n", params.match_flags.default_time_flag ? + "true" : "false"); printf("disk_flag = %s\n", params.match_flags.disk_flag ? "true" : "false"); printf("features_flag = %s\n", params.match_flags.features_flag ? diff --git a/src/sinfo/print.c b/src/sinfo/print.c index fc0ae3c7317..d54c36272dd 100644 --- a/src/sinfo/print.c +++ b/src/sinfo/print.c @@ -805,6 +805,26 @@ int _print_time(sinfo_data_t * sinfo_data, int width, return SLURM_SUCCESS; } +int _print_default_time(sinfo_data_t * sinfo_data, int width, + bool right_justify, char *suffix) +{ + if (sinfo_data) { + if ((sinfo_data->part_info == NULL) || + (sinfo_data->part_info->default_time == NO_VAL)) + _print_str("n/a", width, right_justify, true); + else if (sinfo_data->part_info->default_time == INFINITE) + _print_str("infinite", width, right_justify, true); + else + _print_secs((sinfo_data->part_info->default_time * 60L), + width, right_justify, true); + } else + _print_str("DEFAULTTIME", width, right_justify, true); + + if (suffix) + printf("%s", suffix); + return SLURM_SUCCESS; +} + int _print_weight(sinfo_data_t * sinfo_data, int width, bool right_justify, char *suffix) { diff --git a/src/sinfo/print.h b/src/sinfo/print.h index a7d01c8d580..e6d67d2aca3 100644 --- a/src/sinfo/print.h +++ b/src/sinfo/print.h @@ -116,6 +116,8 @@ int print_sinfo_list(List sinfo_list); format_add_function(list,wid,right,suffix,_print_state_long) #define format_add_time(list,wid,right,suffix) \ format_add_function(list,wid,right,suffix,_print_time) +#define format_add_default_time(list,wid,right,suffix) \ + format_add_function(list,wid,right,suffix,_print_default_time) #define format_add_weight(list,wid,right,suffix) \ format_add_function(list,wid,right,suffix,_print_weight) @@ -173,6 +175,8 @@ int _print_state_long(sinfo_data_t * sinfo_data, int width, bool right_justify, char *suffix); int _print_time(sinfo_data_t * sinfo_data, int width, bool right_justify, char *suffix); +int _print_default_time(sinfo_data_t * sinfo_data, int width, + bool right_justify, char *suffix); int _print_weight(sinfo_data_t * sinfo_data, int width, bool right_justify, char *suffix); diff --git a/src/sinfo/sinfo.c b/src/sinfo/sinfo.c index 83bfac9a6a9..6b45eabd553 100644 --- a/src/sinfo/sinfo.c +++ b/src/sinfo/sinfo.c @@ -650,6 +650,10 @@ static bool _match_part_data(sinfo_data_t *sinfo_ptr, (part_ptr->max_nodes != sinfo_ptr->part_info->max_nodes)) return false; + if (params.match_flags.default_time_flag && + (part_ptr->default_time != sinfo_ptr->part_info->default_time)) + return false; + if (params.match_flags.max_time_flag && (part_ptr->max_time != sinfo_ptr->part_info->max_time)) return false; diff --git a/src/sinfo/sinfo.h b/src/sinfo/sinfo.h index 69e35fd2518..a6eea6f9922 100644 --- a/src/sinfo/sinfo.h +++ b/src/sinfo/sinfo.h @@ -123,6 +123,7 @@ struct sinfo_match_flags { bool features_flag; bool groups_flag; bool job_size_flag; + bool default_time_flag; bool max_time_flag; bool memory_flag; bool partition_flag; diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index d7da8d85abf..8d54d9ea7df 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -2120,6 +2120,10 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, return error_code; } + if ((job_desc->time_limit == NO_VAL) && + (part_ptr->default_time != NO_VAL)) + job_desc->time_limit = part_ptr->default_time; + if ((job_desc->time_limit != NO_VAL) && (job_desc->time_limit > part_ptr->max_time) && slurmctld_conf.enforce_part_limits) { diff --git a/src/slurmctld/partition_mgr.c b/src/slurmctld/partition_mgr.c index f0b32746bb3..2178f8d63fe 100644 --- a/src/slurmctld/partition_mgr.c +++ b/src/slurmctld/partition_mgr.c @@ -222,6 +222,7 @@ struct part_record *create_part_record(void) part_ptr->disable_root_jobs = default_part.disable_root_jobs; part_ptr->hidden = default_part.hidden; part_ptr->max_time = default_part.max_time; + part_ptr->default_time = default_part.default_time; part_ptr->max_nodes = default_part.max_nodes; part_ptr->max_nodes_orig = default_part.max_nodes; part_ptr->min_nodes = default_part.min_nodes; @@ -377,6 +378,7 @@ static void _dump_part_state(struct part_record *part_ptr, Buf buffer) packstr(part_ptr->name, buffer); pack32(part_ptr->max_time, buffer); + pack32(part_ptr->default_time, buffer); pack32(part_ptr->max_nodes_orig, buffer); pack32(part_ptr->min_nodes_orig, buffer); @@ -400,7 +402,7 @@ static void _dump_part_state(struct part_record *part_ptr, Buf buffer) int load_all_part_state(void) { char *part_name, *allow_groups, *nodes, *state_file, *data = NULL; - uint32_t max_time, max_nodes, min_nodes; + uint32_t max_time, default_time, max_nodes, min_nodes; time_t time; uint16_t def_part_flag, hidden, root_only; uint16_t max_share, priority, state_up; @@ -463,6 +465,7 @@ int load_all_part_state(void) while (remaining_buf(buffer) > 0) { safe_unpackstr_xmalloc(&part_name, &name_len, buffer); safe_unpack32(&max_time, buffer); + safe_unpack32(&default_time, buffer); safe_unpack32(&max_nodes, buffer); safe_unpack32(&min_nodes, buffer); @@ -502,6 +505,7 @@ int load_all_part_state(void) part_cnt++; part_ptr->hidden = hidden; part_ptr->max_time = max_time; + part_ptr->default_time = default_time; part_ptr->max_nodes = max_nodes; part_ptr->max_nodes_orig = max_nodes; part_ptr->min_nodes = min_nodes; @@ -573,6 +577,7 @@ int init_part_conf(void) default_part.disable_root_jobs = slurmctld_conf.disable_root_jobs; default_part.hidden = 0; default_part.max_time = INFINITE; + default_part.default_time = NO_VAL; default_part.max_nodes = INFINITE; default_part.max_nodes_orig = INFINITE; default_part.min_nodes = 1; @@ -768,6 +773,7 @@ void pack_part(struct part_record *part_ptr, Buf buffer) packstr(part_ptr->name, buffer); pack32(part_ptr->max_time, buffer); + pack32(part_ptr->default_time, buffer); pack32(part_ptr->max_nodes_orig, buffer); pack32(part_ptr->min_nodes_orig, buffer); altered = part_ptr->total_nodes; @@ -852,6 +858,16 @@ extern int update_part (update_part_msg_t * part_desc, bool create_flag) part_ptr->max_time = part_desc->max_time; } + if ((part_desc->default_time != NO_VAL) && + (part_desc->default_time > part_ptr->max_time)) { + info("update_part: DefaultTime would exceed MaxTime for " + "partition %s", part_desc->name); + } else if (part_desc->default_time != NO_VAL) { + info("update_part: setting default_time to %u for partition %s", + part_desc->default_time, part_desc->name); + part_ptr->default_time = part_desc->default_time; + } + if (part_desc->max_nodes != NO_VAL) { info("update_part: setting max_nodes to %u for partition %s", part_desc->max_nodes, part_desc->name); diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 8dcc535a18c..bc09b2257d7 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -635,9 +635,17 @@ static int _build_single_partitionline_info(slurm_conf_partition_t *part) if(part_ptr->disable_root_jobs) debug2("partition %s does not allow root jobs", part_ptr->name); - + + if ((part->default_time != NO_VAL) && + (part->default_time > part->max_time)) { + info("partition %s DefaultTime exceeds MaxTime (%u > %u)", + part->default_time, part->max_time); + part->default_time = NO_VAL; + } + part_ptr->hidden = part->hidden_flag ? 1 : 0; part_ptr->max_time = part->max_time; + part_ptr->default_time = part->default_time; part_ptr->max_share = part->max_share; part_ptr->max_nodes = part->max_nodes; part_ptr->max_nodes_orig = part->max_nodes; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index b3ad922931d..68a4296d473 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -272,6 +272,7 @@ struct part_record { char *name; /* name of the partition */ uint16_t hidden; /* 1 if hidden by default */ uint32_t max_time; /* minutes or INFINITE */ + uint32_t default_time; /* minutes, NO_VAL or INFINITE */ uint32_t max_nodes; /* per job or INFINITE */ uint32_t max_nodes_orig;/* unscaled value (c-nodes on BlueGene) */ uint32_t min_nodes; /* per job */ -- GitLab