From 995aa09ff1f76e513c71707feb0606348e6ab3a0 Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Tue, 18 May 2010 22:24:09 +0000 Subject: [PATCH] Add logic to save/restore job gres details (the allocated bitmaps) and use them to sync node state info on slurmctld restart. --- src/common/gres.c | 2 +- src/common/node_conf.c | 2 +- src/plugins/gres/gpu/gres_gpu.c | 26 +++++++++-- src/plugins/gres/nic/gres_nic.c | 26 +++++++++-- src/slurmctld/node_mgr.c | 2 + src/slurmctld/read_config.c | 78 ++++++++++++++++++++++++++++++--- 6 files changed, 121 insertions(+), 15 deletions(-) diff --git a/src/common/gres.c b/src/common/gres.c index b7be1450141..d5933fdff32 100644 --- a/src/common/gres.c +++ b/src/common/gres.c @@ -1173,7 +1173,7 @@ extern int gres_plugin_pack_job_state(List gres_list, Buf buffer, if (rc2 != SLURM_SUCCESS) { rc = rc2; set_buf_offset(buffer, header_offset); - break; + continue; } tail_offset = get_buf_offset(buffer); set_buf_offset(buffer, size_offset); diff --git a/src/common/node_conf.c b/src/common/node_conf.c index 4b30ad8d93a..42a00f14466 100644 --- a/src/common/node_conf.c +++ b/src/common/node_conf.c @@ -462,7 +462,7 @@ static int _list_find_feature (void *feature_entry, void *key) } /* - * _build_all_nodeline_info - get a array of slurm_conf_node_t structures + * build_all_nodeline_info - get a array of slurm_conf_node_t structures * from the slurm.conf reader, build table, and set values * IN set_bitmap - if true, set node_bitmap in config record (used by slurmd) * RET 0 if no error, error code otherwise diff --git a/src/plugins/gres/gpu/gres_gpu.c b/src/plugins/gres/gpu/gres_gpu.c index 9caa5f8f9b7..de61ea95208 100644 --- a/src/plugins/gres/gpu/gres_gpu.c +++ b/src/plugins/gres/gpu/gres_gpu.c @@ -645,16 +645,22 @@ extern int job_gres_validate(char *config, void **gres_data) extern int pack_job_state(void *gres_data, Buf buffer) { + int i; gpu_job_state_t *gres_ptr = (gpu_job_state_t *) gres_data; - pack32(gres_ptr->gpu_cnt_alloc, buffer); + pack32(gres_ptr->gpu_cnt_alloc, buffer); pack8 (gres_ptr->gpu_cnt_mult, buffer); + pack32(gres_ptr->node_cnt, buffer); + for (i=0; i<gres_ptr->node_cnt; i++) + pack_bit_str(gres_ptr->gpu_bit_alloc[i], buffer); + return SLURM_SUCCESS; } extern int unpack_job_state(void **gres_data, Buf buffer) { + int i; gpu_job_state_t *gres_ptr; gres_ptr = xmalloc(sizeof(gpu_job_state_t)); @@ -662,12 +668,26 @@ extern int unpack_job_state(void **gres_data, Buf buffer) if (buffer) { safe_unpack32(&gres_ptr->gpu_cnt_alloc, buffer); safe_unpack8 (&gres_ptr->gpu_cnt_mult, buffer); + + safe_unpack32(&gres_ptr->node_cnt, buffer); + gres_ptr->gpu_bit_alloc = xmalloc(sizeof(bitstr_t *) * + (gres_ptr->node_cnt + 1)); + for (i=0; i<gres_ptr->node_cnt; i++) + unpack_bit_str(&gres_ptr->gpu_bit_alloc[i], buffer); } *gres_data = gres_ptr; return SLURM_SUCCESS; unpack_error: + error("Unpacking %s job state info", plugin_name); + if (gres_ptr->gpu_bit_alloc) { + for (i=0; i<gres_ptr->node_cnt; i++) { + if (gres_ptr->gpu_bit_alloc[i]) + bit_free(gres_ptr->gpu_bit_alloc[i]); + } + xfree(gres_ptr->gpu_bit_alloc); + } xfree(gres_ptr); *gres_data = NULL; return SLURM_ERROR; @@ -719,8 +739,8 @@ extern int job_alloc(void *job_gres_data, void *node_gres_data, plugin_name); xfree(job_gres_ptr->gpu_bit_alloc); } - job_gres_ptr->gpu_bit_alloc = - xmalloc(sizeof(bitstr_t *) * node_cnt); + job_gres_ptr->gpu_bit_alloc = xmalloc(sizeof(bitstr_t *) * + (node_cnt + 1)); } else if (job_gres_ptr->node_cnt < node_cnt) { error("%s: node_cnt increase from %u to %d", plugin_name, job_gres_ptr->node_cnt, node_cnt); diff --git a/src/plugins/gres/nic/gres_nic.c b/src/plugins/gres/nic/gres_nic.c index 631f3a67643..e15fc7c47f8 100644 --- a/src/plugins/gres/nic/gres_nic.c +++ b/src/plugins/gres/nic/gres_nic.c @@ -645,16 +645,22 @@ extern int job_gres_validate(char *config, void **gres_data) extern int pack_job_state(void *gres_data, Buf buffer) { + int i; nic_job_state_t *gres_ptr = (nic_job_state_t *) gres_data; - pack32(gres_ptr->nic_cnt_alloc, buffer); + pack32(gres_ptr->nic_cnt_alloc, buffer); pack8 (gres_ptr->nic_cnt_mult, buffer); + pack32(gres_ptr->node_cnt, buffer); + for (i=0; i<gres_ptr->node_cnt; i++) + pack_bit_str(gres_ptr->nic_bit_alloc[i], buffer); + return SLURM_SUCCESS; } extern int unpack_job_state(void **gres_data, Buf buffer) { + int i; nic_job_state_t *gres_ptr; gres_ptr = xmalloc(sizeof(nic_job_state_t)); @@ -662,12 +668,26 @@ extern int unpack_job_state(void **gres_data, Buf buffer) if (buffer) { safe_unpack32(&gres_ptr->nic_cnt_alloc, buffer); safe_unpack8 (&gres_ptr->nic_cnt_mult, buffer); + + safe_unpack32(&gres_ptr->node_cnt, buffer); + gres_ptr->nic_bit_alloc = xmalloc(sizeof(bitstr_t *) * + (gres_ptr->node_cnt + 1)); + for (i=0; i<gres_ptr->node_cnt; i++) + unpack_bit_str(&gres_ptr->nic_bit_alloc[i], buffer); } *gres_data = gres_ptr; return SLURM_SUCCESS; unpack_error: + error("Unpacking %s job state info", plugin_name); + if (gres_ptr->nic_bit_alloc) { + for (i=0; i<gres_ptr->node_cnt; i++) { + if (gres_ptr->nic_bit_alloc[i]) + bit_free(gres_ptr->nic_bit_alloc[i]); + } + xfree(gres_ptr->nic_bit_alloc); + } xfree(gres_ptr); *gres_data = NULL; return SLURM_ERROR; @@ -719,8 +739,8 @@ extern int job_alloc(void *job_gres_data, void *node_gres_data, plugin_name); xfree(job_gres_ptr->nic_bit_alloc); } - job_gres_ptr->nic_bit_alloc = - xmalloc(sizeof(bitstr_t *) * node_cnt); + job_gres_ptr->nic_bit_alloc = xmalloc(sizeof(bitstr_t *) * + (node_cnt + 1)); } else if (job_gres_ptr->node_cnt < node_cnt) { error("%s: node_cnt increase from %u to %d", plugin_name, job_gres_ptr->node_cnt, node_cnt); diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index c99b21bbd3d..8753bf89bfc 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -435,6 +435,8 @@ extern int load_all_node_state ( bool state_only ) node_ptr->reason_time = reason_time; node_ptr->reason_uid = reason_uid; } + node_ptr->gres_list = gres_list; + gres_list = NULL; /* Nothing to free */ } else { node_cnt++; if ((!power_save_mode) && diff --git a/src/slurmctld/read_config.c b/src/slurmctld/read_config.c index 3201d5faab0..16a01dbc88f 100644 --- a/src/slurmctld/read_config.c +++ b/src/slurmctld/read_config.c @@ -92,6 +92,7 @@ static void _acct_restore_active_jobs(void); static int _build_bitmaps(void); static void _build_bitmaps_pre_select(void); +static void _gres_reconig(bool reconfig); static int _init_all_slurm_conf(void); static int _preserve_select_type_param(slurm_ctl_conf_t * ctl_conf_ptr, uint16_t old_select_type_p); @@ -627,7 +628,6 @@ int read_slurm_conf(int recover, bool reconfig) char *state_save_dir = xstrdup(slurmctld_conf.state_save_location); char *mpi_params; uint16_t old_select_type_p = slurmctld_conf.select_type_param; - bool gres_changed = false; /* initialization */ START_TIMER; @@ -670,7 +670,7 @@ int read_slurm_conf(int recover, bool reconfig) if (slurm_topo_init() != SLURM_SUCCESS) fatal("Failed to initialize topology plugin"); - /* Build node and partittion information based upon slurm.conf file */ + /* Build node and partition information based upon slurm.conf file */ _build_all_nodeline_info(); _handle_all_downnodes(); _build_all_partitionline_info(); @@ -809,8 +809,7 @@ int read_slurm_conf(int recover, bool reconfig) #endif /* Sync select plugin with synchronized job/node/part data */ - if (reconfig) - gres_plugin_reconfig(&gres_changed); + _gres_reconig(reconfig); select_g_reconfigure(); slurmctld_conf.last_update = time(NULL); @@ -818,6 +817,65 @@ int read_slurm_conf(int recover, bool reconfig) return error_code; } +static void _gres_reconig(bool reconfig) +{ + struct node_record *node_ptr; + struct job_record *job_ptr; + ListIterator job_iterator; + int i, i_first, i_last, node_offset; + bool gres_active, gres_changed = false; + char *plugin_names; + + if (reconfig) + gres_plugin_reconfig(&gres_changed); + + plugin_names = slurm_get_gres_plugins(); + if (plugin_names && plugin_names[0]) + gres_active = true; + else + gres_active = false; + xfree(plugin_names); + if (!gres_active) + return; + + /* Clear existing node Gres allocations */ + for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count; + i++, node_ptr++) { + gres_plugin_node_state_dealloc(node_ptr->gres_list); + } + + /* Reallocate job gres to the nodes */ + job_iterator = list_iterator_create(job_list); + while ((job_ptr = (struct job_record *) list_next(job_iterator))) { + if (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_SUSPENDED(job_ptr)) + continue; + if (job_ptr->job_resrcs == NULL) + continue; + + i_first = bit_ffs(job_ptr->node_bitmap); + i_last = bit_fls(job_ptr->node_bitmap); + if (i_first == -1) + i_last = -2; + node_offset = -1; + for (i = i_first; i <= i_last; i++) { + if (!bit_test(job_ptr->job_resrcs->node_bitmap, i)) + continue; + node_offset++; + if (!bit_test(job_ptr->node_bitmap, i)) + continue; + node_ptr = node_record_table_ptr + i; + gres_plugin_node_state_realloc(job_ptr->gres_list, + node_offset, + node_ptr->gres_list); + } + } + list_iterator_destroy(job_iterator); + + for (i = 0, node_ptr = node_record_table_ptr; i < node_record_count; + i++, node_ptr++) { + gres_plugin_node_state_log(node_ptr->gres_list, node_ptr->name); + } +} /* Restore node state and size information from saved records which match * the node registration message. If a node was re-configured to be down or @@ -874,12 +932,18 @@ static int _restore_node_state(int recover, node_ptr->config_ptr->cpus); } node_ptr->cpus = old_node_ptr->cpus; - node_ptr->sockets = old_node_ptr->sockets; node_ptr->cores = old_node_ptr->cores; + node_ptr->sockets = old_node_ptr->sockets; node_ptr->threads = old_node_ptr->threads; node_ptr->real_memory = old_node_ptr->real_memory; node_ptr->tmp_disk = old_node_ptr->tmp_disk; node_ptr->weight = old_node_ptr->weight; + + if (node_ptr->gres_list) + list_destroy(node_ptr->gres_list); + node_ptr->gres_list = old_node_ptr->gres_list; + old_node_ptr->gres_list = NULL; + if (node_ptr->reason == NULL) { /* Recover only if not explicitly set in slurm.conf */ node_ptr->reason = old_node_ptr->reason; @@ -1335,8 +1399,8 @@ static int _sync_nodes_to_active_job(struct job_record *job_ptr) cnt++; } else if (IS_NODE_IDLE(node_ptr)) { cnt++; - node_ptr->node_state = - NODE_STATE_ALLOCATED | node_flags; + node_ptr->node_state = NODE_STATE_ALLOCATED | + node_flags; } } return cnt; -- GitLab