From 2c99c42a4fc1a570627099bd987b21baefcf6306 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Fri, 14 May 2010 15:29:25 +0000 Subject: [PATCH] Insure that we don't purge any jobs before testing after_ok, not_ok dependencies upon that job --- src/slurmctld/job_mgr.c | 30 +++++++++++++++++++++++++----- src/slurmctld/slurmctld.h | 9 +++++---- 2 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 8c2bdd88fbe..213a5949ce5 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -4860,19 +4860,39 @@ static void _pack_pending_job_details(struct job_details *detail_ptr, /* * purge_old_job - purge old job records. - * the jobs must have completed at least MIN_JOB_AGE minutes ago - * global: job_list - global job table - * last_job_update - time of last job table update - * NOTE: READ lock_slurmctld config before entry + * The jobs must have completed at least MIN_JOB_AGE minutes ago. + * Test job dependencies, handle after_ok, after_not_ok before + * purging any jobs. + * NOTE: READ lock slurmctld config and WRITE lock jobs before entry */ void purge_old_job(void) { + ListIterator job_iterator; + struct job_record *job_ptr; + time_t now = time(NULL); int i; + job_iterator = list_iterator_create(job_list); + while ((job_ptr = (struct job_record *) list_next(job_iterator))) { + if (!IS_JOB_PENDING(job_ptr)) + continue; + if (test_job_dependency(job_ptr) == 2) { + info("Job dependency can't be satisfied, cancelling " + "job %u", job_ptr->job_id); + job_ptr->job_state = JOB_CANCELLED; + xfree(job_ptr->state_desc); + job_ptr->start_time = now; + job_ptr->end_time = now; + job_completion_logger(job_ptr); + last_job_update = now; + } + } + list_iterator_destroy(job_iterator); + i = list_delete_all(job_list, &_list_find_job_old, ""); if (i) { debug2("purge_old_job: purged %d old job records", i); -/* last_job_update = time(NULL); don't worry about state save */ +/* last_job_update = now; don't worry about state save */ } } diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index 4d7c90a8368..38853f1e270 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -1347,11 +1347,12 @@ extern void part_fini (void); /* * purge_old_job - purge old job records. - * the jobs must have completed at least MIN_JOB_AGE minutes ago - * global: job_list - global job table - * last_job_update - time of last job table update + * The jobs must have completed at least MIN_JOB_AGE minutes ago. + * Test job dependencies, handle after_ok, after_not_ok before + * purging any jobs. + * NOTE: READ lock slurmctld config and WRITE lock jobs before entry */ -extern void purge_old_job (void); +void purge_old_job(void); /* * rehash_jobs - Create or rebuild the job hash table. -- GitLab