From 106263f2f9e4d4c0abb3e5cd757fa7ef200605fd Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Fri, 18 Jun 2010 17:48:35 +0000 Subject: [PATCH] add node_bitmap_cg to report nodes which are still completing a job --- src/slurmctld/job_mgr.c | 70 +++++++++++++++++++++++++--------- src/slurmctld/node_mgr.c | 2 +- src/slurmctld/node_scheduler.c | 4 +- src/slurmctld/read_config.c | 1 + src/slurmctld/slurmctld.h | 4 ++ 5 files changed, 61 insertions(+), 20 deletions(-) diff --git a/src/slurmctld/job_mgr.c b/src/slurmctld/job_mgr.c index 2672d4e8658..300892958e9 100644 --- a/src/slurmctld/job_mgr.c +++ b/src/slurmctld/job_mgr.c @@ -1671,6 +1671,7 @@ extern int kill_job_by_part_name(char *part_name) info("Killing job_id %u on defunct partition %s", job_ptr->job_id, part_name); job_ptr->job_state = JOB_NODE_FAIL | JOB_COMPLETING; + build_cg_bitmap(job_ptr); job_ptr->exit_code = MAX(job_ptr->exit_code, 1); job_ptr->state_reason = FAIL_DOWN_PARTITION; xfree(job_ptr->state_desc); @@ -1740,7 +1741,7 @@ extern int kill_running_job_by_node_name(char *node_name) if (IS_JOB_COMPLETING(job_ptr)) { job_count++; - bit_clear(job_ptr->node_bitmap, bit_position); + bit_clear(job_ptr->node_bitmap_cg, bit_position); job_update_cpu_cnt(job_ptr, bit_position); if (job_ptr->node_cnt) (job_ptr->node_cnt)--; @@ -1805,8 +1806,10 @@ extern int kill_running_job_by_node_name(char *node_name) job_completion_logger(job_ptr, true); job_ptr->db_index = 0; job_ptr->job_state = JOB_PENDING; - if (job_ptr->node_cnt) + if (job_ptr->node_cnt) { job_ptr->job_state |= JOB_COMPLETING; + build_cg_bitmap(job_ptr); + } job_ptr->details->submit_time = now; /* restart from periodic checkpoint */ @@ -1832,6 +1835,7 @@ extern int kill_running_job_by_node_name(char *node_name) srun_node_fail(job_ptr->job_id, node_name); job_ptr->job_state = JOB_NODE_FAIL | JOB_COMPLETING; + build_cg_bitmap(job_ptr); job_ptr->exit_code = MAX(job_ptr->exit_code, 1); job_ptr->state_reason = FAIL_DOWN_NODE; xfree(job_ptr->state_desc); @@ -2325,6 +2329,7 @@ extern int job_fail(uint32_t job_id) job_ptr->end_time = now; last_job_update = now; job_ptr->job_state = JOB_FAILED | JOB_COMPLETING; + build_cg_bitmap(job_ptr); job_ptr->exit_code = 1; job_ptr->state_reason = FAIL_LAUNCH; xfree(job_ptr->state_desc); @@ -2398,6 +2403,7 @@ extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag, if (IS_JOB_PENDING(job_ptr) && IS_JOB_COMPLETING(job_ptr) && (signal == SIGKILL)) { job_ptr->job_state = JOB_CANCELLED | JOB_COMPLETING; + /* build_cg_bitmap() not needed, job already completing */ verbose("job_signal of requeuing job %u successful", job_id); return SLURM_SUCCESS; } @@ -2418,6 +2424,7 @@ extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag, job_ptr->end_time = job_ptr->suspend_time; job_ptr->tot_sus_time += difftime(now, job_ptr->suspend_time); job_ptr->job_state = JOB_CANCELLED | JOB_COMPLETING; + build_cg_bitmap(job_ptr); jobacct_storage_g_job_suspend(acct_db_conn, job_ptr); deallocate_nodes(job_ptr, false, true); job_completion_logger(job_ptr, false); @@ -2433,6 +2440,7 @@ extern int job_signal(uint32_t job_id, uint16_t signal, uint16_t batch_flag, job_ptr->end_time = now; last_job_update = now; job_ptr->job_state = JOB_CANCELLED | JOB_COMPLETING; + build_cg_bitmap(job_ptr); deallocate_nodes(job_ptr, false, false); job_completion_logger(job_ptr, false); } else if (batch_flag) { @@ -2521,8 +2529,10 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue, if (IS_JOB_COMPLETING(job_ptr)) return SLURM_SUCCESS; /* avoid replay */ - if (IS_JOB_RUNNING(job_ptr)) + if (IS_JOB_RUNNING(job_ptr)) { job_comp_flag = JOB_COMPLETING; + build_cg_bitmap(job_ptr); + } if (IS_JOB_SUSPENDED(job_ptr)) { enum job_states suspend_job_state = job_ptr->job_state; /* we can't have it as suspended when we call the @@ -2532,6 +2542,7 @@ extern int job_complete(uint32_t job_id, uid_t uid, bool requeue, jobacct_storage_g_job_suspend(acct_db_conn, job_ptr); job_ptr->job_state = suspend_job_state; job_comp_flag = JOB_COMPLETING; + build_cg_bitmap(job_ptr); suspended = true; } @@ -4165,6 +4176,7 @@ static void _job_timed_out(struct job_record *job_ptr) job_ptr->end_time = now; job_ptr->time_last_active = now; job_ptr->job_state = JOB_TIMEOUT | JOB_COMPLETING; + build_cg_bitmap(job_ptr); job_ptr->exit_code = MAX(job_ptr->exit_code, 1); deallocate_nodes(job_ptr, true, false); job_completion_logger(job_ptr, false); @@ -4307,6 +4319,7 @@ static void _list_delete_job(void *job_entry) xfree(job_ptr->network); xfree(job_ptr->node_addr); FREE_NULL_BITMAP(job_ptr->node_bitmap); + FREE_NULL_BITMAP(job_ptr->node_bitmap_cg); xfree(job_ptr->nodes); xfree(job_ptr->nodes_completing); xfree(job_ptr->partition); @@ -4559,10 +4572,10 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer, /* Only send the allocated nodelist since we are only sending * the number of cpus and nodes that are currently allocated. */ - if(!IS_JOB_COMPLETING(dump_job_ptr)) + if (!IS_JOB_COMPLETING(dump_job_ptr)) packstr(dump_job_ptr->nodes, buffer); else { - nodelist = bitmap2node_name(dump_job_ptr->node_bitmap); + nodelist = bitmap2node_name(dump_job_ptr->node_bitmap_cg); packstr(nodelist, buffer); xfree(nodelist); } @@ -4598,7 +4611,10 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer, packstr(dump_job_ptr->name, buffer); packstr(dump_job_ptr->wckey, buffer); packstr(dump_job_ptr->alloc_node, buffer); - pack_bit_fmt(dump_job_ptr->node_bitmap, buffer); + if (!IS_JOB_COMPLETING(dump_job_ptr)) + pack_bit_fmt(dump_job_ptr->node_bitmap, buffer); + else + pack_bit_fmt(dump_job_ptr->node_bitmap_cg, buffer); select_g_select_jobinfo_pack(dump_job_ptr->select_jobinfo, buffer, protocol_version); @@ -4658,10 +4674,10 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer, /* Only send the allocated nodelist since we are only sending * the number of cpus and nodes that are currently allocated. */ - if(!IS_JOB_COMPLETING(dump_job_ptr)) + if (!IS_JOB_COMPLETING(dump_job_ptr)) packstr(dump_job_ptr->nodes, buffer); else { - nodelist = bitmap2node_name(dump_job_ptr->node_bitmap); + nodelist = bitmap2node_name(dump_job_ptr->node_bitmap_cg); packstr(nodelist, buffer); xfree(nodelist); } @@ -4697,7 +4713,10 @@ void pack_job(struct job_record *dump_job_ptr, uint16_t show_flags, Buf buffer, packstr(dump_job_ptr->name, buffer); packstr(dump_job_ptr->wckey, buffer); packstr(dump_job_ptr->alloc_node, buffer); - pack_bit_fmt(dump_job_ptr->node_bitmap, buffer); + if (!IS_JOB_COMPLETING(dump_job_ptr)) + pack_bit_fmt(dump_job_ptr->node_bitmap, buffer); + else + pack_bit_fmt(dump_job_ptr->node_bitmap_cg, buffer); detail_ptr = dump_job_ptr->details; if (IS_JOB_COMPLETING(dump_job_ptr) && dump_job_ptr->cpu_cnt) @@ -4970,17 +4989,19 @@ void reset_job_bitmaps(void) } job_ptr->part_ptr = part_ptr; - FREE_NULL_BITMAP(job_ptr->node_bitmap); - if ((job_ptr->nodes_completing) && - (node_name2bitmap(job_ptr->nodes_completing, - false, &job_ptr->node_bitmap))) { + FREE_NULL_BITMAP(job_ptr->node_bitmap_cg); + if (job_ptr->nodes_completing && + node_name2bitmap(job_ptr->nodes_completing, + false, &job_ptr->node_bitmap_cg)) { error("Invalid nodes (%s) for job_id %u", job_ptr->nodes_completing, job_ptr->job_id); job_fail = true; - } else if ((job_ptr->node_bitmap == NULL) && job_ptr->nodes && - (node_name2bitmap(job_ptr->nodes, false, - &job_ptr->node_bitmap))) { + } + FREE_NULL_BITMAP(job_ptr->node_bitmap); + if (job_ptr->nodes && + node_name2bitmap(job_ptr->nodes, false, + &job_ptr->node_bitmap) && !job_fail) { error("Invalid nodes (%s) for job_id %u", job_ptr->nodes, job_ptr->job_id); job_fail = true; @@ -5012,10 +5033,12 @@ void reset_job_bitmaps(void) job_ptr->end_time = time(NULL); job_ptr->job_state = JOB_NODE_FAIL | JOB_COMPLETING; + build_cg_bitmap(job_ptr); } else if (IS_JOB_SUSPENDED(job_ptr)) { job_ptr->end_time = job_ptr->suspend_time; job_ptr->job_state = JOB_NODE_FAIL | JOB_COMPLETING; + build_cg_bitmap(job_ptr); job_ptr->tot_sus_time += difftime(now, job_ptr->suspend_time); jobacct_storage_g_job_suspend(acct_db_conn, @@ -7587,8 +7610,10 @@ extern int job_requeue (uid_t uid, uint32_t job_id, slurm_fd conn_fd, job_completion_logger(job_ptr, true); job_ptr->db_index = 0; job_ptr->job_state = JOB_PENDING; - if (job_ptr->node_cnt) + if (job_ptr->node_cnt) { job_ptr->job_state |= JOB_COMPLETING; + build_cg_bitmap(job_ptr); + } job_ptr->details->submit_time = now; job_ptr->pre_sus_time = (time_t) 0; @@ -8953,3 +8978,14 @@ _read_job_ckpt_file(char *ckpt_file, int *size_ptr) *size_ptr = data_size; return data; } + +/* Build a bitmap of nodes completing this job */ +extern void build_cg_bitmap(struct job_record *job_ptr) +{ + FREE_NULL_BITMAP(job_ptr->node_bitmap_cg); + if (job_ptr->node_bitmap) { + job_ptr->node_bitmap_cg = bit_copy(job_ptr->node_bitmap); + if (job_ptr->node_bitmap_cg == NULL) + fatal("bit_copy: memory allocation failure"); + } +} diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index e81b615e973..f6ab07169b6 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -2409,7 +2409,7 @@ void make_node_idle(struct node_record *node_ptr, if (job_ptr && /* Specific job completed */ (bit_test(job_ptr->node_bitmap, inx))) { /* Not a replay */ last_job_update = now; - bit_clear(job_ptr->node_bitmap, inx); + bit_clear(job_ptr->node_bitmap_cg, inx); job_update_cpu_cnt(job_ptr, inx); diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index fd801dda060..ed8abba6512 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -202,7 +202,7 @@ extern void deallocate_nodes(struct job_record *job_ptr, bool timeout, if (IS_NODE_DOWN(node_ptr)) { /* Issue the KILL RPC, but don't verify response */ down_node_cnt++; - bit_clear(job_ptr->node_bitmap, i); + bit_clear(job_ptr->node_bitmap_cg, i); job_update_cpu_cnt(job_ptr, i); job_ptr->node_cnt--; } @@ -1897,7 +1897,7 @@ extern void re_kill_job(struct job_record *job_ptr) continue; if (IS_NODE_DOWN(node_ptr)) { /* Consider job already completed */ - bit_clear(job_ptr->node_bitmap, i); + bit_clear(job_ptr->node_bitmap_cg, i); job_update_cpu_cnt(job_ptr, i); if (node_ptr->comp_job_cnt) (node_ptr->comp_job_cnt)--; diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 875f2c19e1e..07b6f5b78f2 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -1366,6 +1366,7 @@ static int _sync_nodes_to_active_job(struct job_record *job_ptr) info("Killing job %u on DOWN node %s", job_ptr->job_id, node_ptr->name); job_ptr->job_state = JOB_NODE_FAIL | JOB_COMPLETING; + build_cg_bitmap(job_ptr); job_ptr->end_time = MIN(job_ptr->end_time, now); job_ptr->exit_code = MAX(job_ptr->exit_code, 1); job_ptr->state_reason = FAIL_DOWN_NODE; diff --git a/src/slurmctld/slurmctld.h b/src/slurmctld/slurmctld.h index e77b43642c9..e534c145583 100644 --- a/src/slurmctld/slurmctld.h +++ b/src/slurmctld/slurmctld.h @@ -435,6 +435,7 @@ struct job_record { slurm_addr *node_addr; /* addresses of the nodes allocated to * job */ bitstr_t *node_bitmap; /* bitmap of nodes allocated to job */ + bitstr_t *node_bitmap_cg; /* bitmap of nodes completing job */ uint32_t node_cnt; /* count of nodes currently * allocated to job */ char *nodes_completing; /* nodes still in completing state @@ -597,6 +598,9 @@ enum select_plugindata_info { extern void abort_job_on_node(uint32_t job_id, struct job_record *job_ptr, struct node_record *node_ptr); +/* Build a bitmap of nodes completing this job */ +extern void build_cg_bitmap(struct job_record *job_ptr); + /* Given a config_record with it's bitmap already set, update feature_list */ extern void build_config_feature_list(struct config_record *config_ptr); -- GitLab