diff --git a/NEWS b/NEWS index 5d6e1ca46aa42c1d2c05bc9d4401916d43270618..97f7ce17b1de75dbd014e6823fbba99dfbf7a5cf 100644 --- a/NEWS +++ b/NEWS @@ -95,6 +95,9 @@ documents those changes that are of interest to users and administrators. ========================== -- Docs - the correct default value for GroupUpdateForce is 0. -- mpi/pmix - improve point to point communication performance. + -- SlurmDB - include pending jobs in search during 'sacctmgr show runawayjobs'. + -- Add client side out-of-range checks to --nice flag. + -- Fix support for sbatch "-W" option, previously eeded to use "--wait". * Changes in Slurm 16.05.5 ========================== diff --git a/src/plugins/accounting_storage/mysql/as_mysql_fix_runaway_jobs.c b/src/plugins/accounting_storage/mysql/as_mysql_fix_runaway_jobs.c index 6e06aed3a319f404268e5bcb1424153e90c15874..89ca7e484a446a5716a94b9f105813e8cccc5ba4 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_fix_runaway_jobs.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_fix_runaway_jobs.c @@ -120,7 +120,8 @@ extern int as_mysql_fix_runaway_jobs(mysql_conn_t *mysql_conn, uint32_t uid, xstrfmtcat(job_ids, "%s%d", ((job_ids) ? "," : ""), job->jobid); } - query = xstrdup_printf("UPDATE \"%s_%s\" SET time_end=time_start, " + query = xstrdup_printf("UPDATE \"%s_%s\" SET time_end=" + "GREATEST(time_start, time_eligible, time_submit), " "state=%d WHERE id_job IN (%s);", mysql_conn->cluster_name, job_table, JOB_COMPLETE, job_ids); diff --git a/src/sacctmgr/runaway_job_functions.c b/src/sacctmgr/runaway_job_functions.c index f1dbff24e47b33ceeb050883475c74503e54add2..43916592de0885ea87a4ea6fc54a84897e1d6613 100644 --- a/src/sacctmgr/runaway_job_functions.c +++ b/src/sacctmgr/runaway_job_functions.c @@ -200,6 +200,7 @@ static List _get_runaway_jobs(slurmdb_job_cond_t *job_cond) job_cond->without_steps = 1; job_cond->without_usage_truncation = 1; job_cond->state_list = list_create(slurm_destroy_char); + slurm_addto_char_list(job_cond->state_list, "0"); slurm_addto_char_list(job_cond->state_list, "1"); if (!job_cond->cluster_list || !list_count(job_cond->cluster_list)) { @@ -290,9 +291,11 @@ extern int sacctmgr_list_runaway_jobs(int argc, char *argv[]) List format_list = list_create(slurm_destroy_char); slurmdb_job_cond_t *job_cond = xmalloc(sizeof(slurmdb_job_cond_t)); char *ask_msg = "\nWould you like to fix these runaway jobs?\n" - "(This will set the end times to start times and " - "states to completed for these jobs and will trigger " - "the rollup to reroll usage from before the oldest " + "(This will set the end time for each job to the " + "latest out of the start, eligible, or submit times, " + "and set the state to completed.\n" + "Once corrected, this will trigger the rollup to " + "reroll usage from before the oldest " "runaway job.)\n\n"; diff --git a/src/salloc/opt.c b/src/salloc/opt.c index 9862d9c7f7a776ca0f15cd5f006aaf3ff3363ecb..c57164812bf72718771dad8ed7ab43036289dcd8 100644 --- a/src/salloc/opt.c +++ b/src/salloc/opt.c @@ -1101,21 +1101,29 @@ void set_options(const int argc, char **argv) opt.mcs_label = xstrdup(optarg); break; } - case LONG_OPT_NICE: + case LONG_OPT_NICE: { + long long tmp_nice; if (optarg) - opt.nice = strtol(optarg, NULL, 10); + tmp_nice = strtoll(optarg, NULL, 10); else - opt.nice = 100; - if (opt.nice < 0) { + tmp_nice = 100; + if (abs(tmp_nice) > (NICE_OFFSET - 3)) { + error("Nice value out of range (+/- %u). Value " + "ignored", NICE_OFFSET - 3); + tmp_nice = 0; + } + if (tmp_nice < 0) { uid_t my_uid = getuid(); if ((my_uid != 0) && (my_uid != slurm_get_slurm_user_id())) { error("Nice value must be " "non-negative, value ignored"); - opt.nice = 0; + tmp_nice = 0; } } + opt.nice = (int) tmp_nice; break; + } case LONG_OPT_PRIORITY: if (!optarg) { /* CLANG Fix */ ; diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index fb7a2653dc27433f8e07519bc2da5d2bc0be4cfc..f1eab0778331c52901ca591b153999a0cb595263 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -124,7 +124,6 @@ enum wrappers { #define LONG_OPT_PROPAGATE 0x100 #define LONG_OPT_MEM_BIND 0x102 #define LONG_OPT_POWER 0x103 -#define LONG_OPT_WAIT 0x104 #define LONG_OPT_JOBID 0x105 #define LONG_OPT_TMP 0x106 #define LONG_OPT_MEM 0x107 @@ -807,6 +806,7 @@ static struct option long_options[] = { {"verbose", no_argument, 0, 'v'}, {"version", no_argument, 0, 'V'}, {"nodelist", required_argument, 0, 'w'}, + {"wait", no_argument, 0, 'W'}, {"exclude", required_argument, 0, 'x'}, {"acctg-freq", required_argument, 0, LONG_OPT_ACCTG_FREQ}, {"bbf", required_argument, 0, LONG_OPT_BURST_BUFFER_FILE}, @@ -874,7 +874,6 @@ static struct option long_options[] = { {"tmp", required_argument, 0, LONG_OPT_TMP}, {"uid", required_argument, 0, LONG_OPT_UID}, {"use-min-nodes", no_argument, 0, LONG_OPT_USE_MIN_NODES}, - {"wait", no_argument, 0, LONG_OPT_WAIT}, {"wait-all-nodes",required_argument, 0, LONG_OPT_WAIT_ALL_NODES}, {"wckey", required_argument, 0, LONG_OPT_WCKEY}, {"wrap", required_argument, 0, LONG_OPT_WRAP}, @@ -1509,6 +1508,9 @@ static void _set_options(int argc, char **argv) xfree(opt.nodelist); opt.nodelist = xstrdup(optarg); break; + case 'W': + opt.wait = true; + break; case 'x': xfree(opt.exc_nodes); opt.exc_nodes = xstrdup(optarg); @@ -1669,21 +1671,29 @@ static void _set_options(int argc, char **argv) xfree(opt.burst_buffer_file); opt.burst_buffer_file = _read_file(optarg); break; - case LONG_OPT_NICE: + case LONG_OPT_NICE: { + long long tmp_nice; if (optarg) - opt.nice = strtol(optarg, NULL, 10); + tmp_nice = strtoll(optarg, NULL, 10); else - opt.nice = 100; - if (opt.nice < 0) { + tmp_nice = 100; + if (abs(tmp_nice) > (NICE_OFFSET - 3)) { + error("Nice value out of range (+/- %u). Value " + "ignored", NICE_OFFSET - 3); + tmp_nice = 0; + } + if (tmp_nice < 0) { uid_t my_uid = getuid(); if ((my_uid != 0) && (my_uid != slurm_get_slurm_user_id())) { error("Nice value must be " "non-negative, value ignored"); - opt.nice = 0; + tmp_nice = 0; } } + opt.nice = (int) tmp_nice; break; + } case LONG_OPT_PRIORITY: if (!optarg) { /* CLANG Fix */ ; @@ -1839,9 +1849,6 @@ static void _set_options(int argc, char **argv) xfree(opt.network); opt.network = xstrdup(optarg); break; - case LONG_OPT_WAIT: - opt.wait = true; - break; case LONG_OPT_WCKEY: xfree(opt.wckey); opt.wckey = xstrdup(optarg); @@ -2202,21 +2209,29 @@ static void _set_pbs_options(int argc, char **argv) else opt.ofname = xstrdup(optarg); break; - case 'p': + case 'p': { + long long tmp_nice; if (optarg) - opt.nice = strtol(optarg, NULL, 10); + tmp_nice = strtoll(optarg, NULL, 10); else - opt.nice = 100; - if (opt.nice < 0) { + tmp_nice = 100; + if (abs(tmp_nice) > (NICE_OFFSET - 3)) { + error("Nice value out of range (+/- %u). Value " + "ignored", NICE_OFFSET - 3); + tmp_nice = 0; + } + if (tmp_nice < 0) { uid_t my_uid = getuid(); if ((my_uid != 0) && (my_uid != slurm_get_slurm_user_id())) { error("Nice value must be " "non-negative, value ignored"); - opt.nice = 0; + tmp_nice = 0; } } + opt.nice = (int) tmp_nice; break; + } case 'q': xfree(opt.partition); opt.partition = xstrdup(optarg); @@ -2515,21 +2530,28 @@ static void _parse_pbs_resource_list(char *rl) xfree(temp); } } else if (!xstrncmp(rl+i, "nice=", 5)) { + long long tmp_nice; i += 5; temp = _get_pbs_option_value(rl, &i, ','); if (temp) - opt.nice = strtol(temp, NULL, 10); + tmp_nice = strtoll(temp, NULL, 10); else - opt.nice = 100; - if (opt.nice < 0) { + tmp_nice = 100; + if (abs(tmp_nice) > (NICE_OFFSET - 3)) { + error("Nice value out of range (+/- %u). Value " + "ignored", NICE_OFFSET - 3); + tmp_nice = 0; + } + if (tmp_nice < 0) { uid_t my_uid = getuid(); if ((my_uid != 0) && (my_uid != slurm_get_slurm_user_id())) { error("Nice value must be " "non-negative, value ignored"); - opt.nice = 0; + tmp_nice = 0; } } + opt.nice = (int) tmp_nice; xfree(temp); } else if (!xstrncmp(rl+i, "nodes=", 6)) { i+=6; diff --git a/src/scontrol/update_job.c b/src/scontrol/update_job.c index 7724ba543c62fbaba529a385b2546fe18ae9529a..b433afec1b5e30e9ecef19029856c539e4405751 100644 --- a/src/scontrol/update_job.c +++ b/src/scontrol/update_job.c @@ -856,16 +856,15 @@ scontrol_update_job (int argc, char *argv[]) update_cnt++; } else if (strncasecmp(tag, "Nice", MAX(taglen, 2)) == 0) { - int nice; - nice = strtoll(val, (char **) NULL, 10); - if (abs(nice) > NICE_OFFSET) { - error("Invalid nice value, must be between " - "-%d and %d", NICE_OFFSET, - NICE_OFFSET); + long long tmp_nice; + tmp_nice = strtoll(val, (char **)NULL, 10); + if (abs(tmp_nice) > (NICE_OFFSET - 3)) { + error("Nice value out of range (+/- %u). Value " + "ignored", NICE_OFFSET - 3); exit_code = 1; return 0; } - job_msg.nice = NICE_OFFSET + nice; + job_msg.nice = NICE_OFFSET + tmp_nice; update_cnt++; } else if (strncasecmp(tag, "CPUsPerTask", MAX(taglen, 6)) == 0) { diff --git a/src/srun/libsrun/opt.c b/src/srun/libsrun/opt.c index 0b4370bf60e31a59734496aefffa9f6aaf922c0d..684644fe8798174060ab55b758c16d8014647aa9 100644 --- a/src/srun/libsrun/opt.c +++ b/src/srun/libsrun/opt.c @@ -1551,21 +1551,29 @@ static void _set_options(const int argc, char **argv) xfree(opt.task_epilog); opt.task_epilog = xstrdup(optarg); break; - case LONG_OPT_NICE: + case LONG_OPT_NICE: { + long long tmp_nice; if (optarg) - opt.nice = strtol(optarg, NULL, 10); + tmp_nice = strtoll(optarg, NULL, 10); else - opt.nice = 100; - if (opt.nice < 0) { + tmp_nice = 100; + if (abs(tmp_nice) > (NICE_OFFSET - 3)) { + error("Nice value out of range (+/- %u). Value " + "ignored", NICE_OFFSET - 3); + tmp_nice = 0; + } + if (tmp_nice < 0) { uid_t my_uid = getuid(); if ((my_uid != 0) && (my_uid != slurm_get_slurm_user_id())) { - error("Nice value must be non-negative, " - "value ignored"); - opt.nice = 0; + error("Nice value must be " + "non-negative, value ignored"); + tmp_nice = 0; } } + opt.nice = (int) tmp_nice; break; + } case LONG_OPT_PRIORITY: { long long priority; if (strcasecmp(optarg, "TOP") == 0) {