From 26fa811ba4fd2cf7c795c50ad628a3d837feb43b Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Fri, 10 Jul 2009 18:03:29 +0000 Subject: [PATCH] svn merge -r18102:18114 https://eris.llnl.gov/svn/slurm/branches/slurm-2.0 --- NEWS | 1 + .../mysql/accounting_storage_mysql.c | 18 ++++++------ src/slurmctld/job_mgr.c | 28 +++++++++++++------ 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/NEWS b/NEWS index 8ff16356734..40ad95536ac 100644 --- a/NEWS +++ b/NEWS @@ -116,6 +116,7 @@ documents those changes that are of interest to users and admins. to properly handle user names that contain all digits. Return error code from uid_from_string() and gid_from_string() functions rather than a uid of -1, which might be a valid uid or gid on some systems. + -- Fix in re-calcuation of job priorities do to DOWN or DRAINED nodes. * Changes in SLURM 2.0.3 ======================== diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index 49b017d5122..4d3e55d72cd 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -3032,14 +3032,14 @@ static int _mysql_acct_check_tables(MYSQL *db_conn) { "user_usec", "int unsigned default 0 not null" }, { "sys_sec", "int unsigned default 0 not null" }, { "sys_usec", "int unsigned default 0 not null" }, - { "max_vsize", "int unsigned default 0 not null" }, + { "max_vsize", "bigint unsigned default 0 not null" }, { "max_vsize_task", "smallint unsigned default 0 not null" }, { "max_vsize_node", "int unsigned default 0 not null" }, - { "ave_vsize", "float default 0.0 not null" }, - { "max_rss", "int unsigned default 0 not null" }, + { "ave_vsize", "double default 0.0 not null" }, + { "max_rss", "bigint unsigned default 0 not null" }, { "max_rss_task", "smallint unsigned default 0 not null" }, { "max_rss_node", "int unsigned default 0 not null" }, - { "ave_rss", "float default 0.0 not null" }, + { "ave_rss", "double default 0.0 not null" }, { "max_pages", "int unsigned default 0 not null" }, { "max_pages_task", "smallint unsigned default 0 not null" }, { "max_pages_node", "int unsigned default 0 not null" }, @@ -10972,13 +10972,13 @@ extern int jobacct_storage_p_step_complete(mysql_conn_t *mysql_conn, "user_sec=%ld, user_usec=%ld, " "sys_sec=%ld, sys_usec=%ld, " "max_vsize=%u, max_vsize_task=%u, " - "max_vsize_node=%u, ave_vsize=%.2f, " + "max_vsize_node=%u, ave_vsize=%f, " "max_rss=%u, max_rss_task=%u, " - "max_rss_node=%u, ave_rss=%.2f, " + "max_rss_node=%u, ave_rss=%f, " "max_pages=%u, max_pages_task=%u, " - "max_pages_node=%u, ave_pages=%.2f, " - "min_cpu=%.2f, min_cpu_task=%u, " - "min_cpu_node=%u, ave_cpu=%.2f " + "max_pages_node=%u, ave_pages=%f, " + "min_cpu=%f, min_cpu_task=%u, " + "min_cpu_node=%u, ave_cpu=%f " "where id=%d and stepid=%u", step_table, (int)now, comp_status, diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index a35dee4ec66..986485edc7c 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -2592,8 +2592,8 @@ static int _job_create(job_desc_msg_t * job_desc, int allocate, int will_run, /* already confirmed submit_uid==0 */ /* If the priority isn't given we will figure it out later - after we see if the job is eligible or not. So we want - NO_VAL if not set. */ + * after we see if the job is eligible or not. So we want + * NO_VAL if not set. */ job_ptr->priority = job_desc->priority; if (update_job_dependency(job_ptr, job_desc->dependency)) { @@ -3541,7 +3541,18 @@ void job_time_limit(void) } } + /* This needs to be near the top of the loop, checks every + * running, suspended and pending job */ resv_status = job_resv_check(job_ptr); + + if ((job_ptr->priority == 1) && (!IS_JOB_FINISHED(job_ptr))) { + /* Rather than resetting job priorities whenever a + * DOWN, DRAINED or non-responsive node is returned to + * service, we pick them up here. There will be a small + * delay in restting a job's priority, but the code is + * a lot cleaner this way. */ + _set_job_prio(job_ptr); + } if (!IS_JOB_RUNNING(job_ptr)) continue; @@ -5260,6 +5271,9 @@ int update_job(job_desc_msg_t * job_specs, uid_t uid) if (IS_JOB_PENDING(job_ptr) && detail_ptr) { detail_ptr->begin_time = job_specs->begin_time; update_accounting = true; + if ((job_ptr->priority == 1) && + (detail_ptr->begin_time <= now)) + _set_job_prio(job_ptr); } else error_code = ESLURM_DISABLED; } @@ -6905,9 +6919,9 @@ extern int update_job_account(char *module, struct job_record *job_ptr, !job_ptr->assoc_ptr && !(accounting_enforce & ACCOUNTING_ENFORCE_ASSOCS)) { /* if not enforcing associations we want to look for - the default account and use it to avoid getting - trash in the accounting records. - */ + * the default account and use it to avoid getting + * trash in the accounting records. + */ assoc_rec.acct = NULL; assoc_mgr_fill_in_assoc(acct_db_conn, &assoc_rec, accounting_enforce, @@ -7308,10 +7322,6 @@ _copy_job_record_to_job_desc(struct job_record *job_ptr) /* construct a job_desc_msg_t from job */ job_desc = xmalloc(sizeof(job_desc_msg_t)); - if (!job_desc) { - error("_pack_job_for_ckpt: memory exhausted"); - return NULL; - } job_desc->account = xstrdup(job_ptr->account); job_desc->acctg_freq = details->acctg_freq; -- GitLab