From e74016fc2ac5519a1454899cc95a2d09151dc58d Mon Sep 17 00:00:00 2001 From: "Christopher J. Morrone" <morrone2@llnl.gov> Date: Thu, 28 Apr 2005 23:04:40 +0000 Subject: [PATCH] Fix the federation save/restore code. Remove the temporary fed_libstate_t structs used in switch_p_libstate_[save|restore], and therefore eliminate the fed_libstate_t copies. This required some rearranging of code to put make the locking safe. --- src/plugins/switch/federation/federation.c | 143 ++++++++++-------- src/plugins/switch/federation/federation.h | 8 +- .../switch/federation/switch_federation.c | 31 +--- 3 files changed, 87 insertions(+), 95 deletions(-) diff --git a/src/plugins/switch/federation/federation.c b/src/plugins/switch/federation/federation.c index f9ab0c4a505..530668c011e 100644 --- a/src/plugins/switch/federation/federation.c +++ b/src/plugins/switch/federation/federation.c @@ -826,7 +826,7 @@ _copy_node(fed_nodeinfo_t *dest, fed_nodeinfo_t *src) assert(src); assert(dest->magic == FED_NODEINFO_MAGIC); assert(src->magic == FED_NODEINFO_MAGIC); - + strncpy(dest->name, src->name, FED_HOSTLEN); dest->adapter_count = src->adapter_count; for(i = 0; i < dest->adapter_count; i++) { @@ -1017,6 +1017,8 @@ _print_libstate(const fed_libstate_t *l) printf("--Begin libstate--\n"); printf(" magic = %u\n", l->magic); printf(" node_count = %u\n", l->node_count); + printf(" node_max = %u\n", l->node_max); + printf(" hash_max = %u\n", l->hash_max); for(i = 0; i < l->node_count; i++) { memset(buf, 0, 3000); fed_print_nodeinfo(&l->node_list[i], buf, 3000); @@ -1025,12 +1027,13 @@ _print_libstate(const fed_libstate_t *l) printf("--End libstate--\n"); } #endif + /* Unpack nodeinfo and update persistent libstate. * * Used by: slurmctld */ -int -fed_unpack_nodeinfo(fed_nodeinfo_t *n, Buf buf) +static int +_unpack_nodeinfo(fed_nodeinfo_t *n, Buf buf) { int i, j; fed_adapter_t *tmp_a = NULL; @@ -1048,7 +1051,6 @@ fed_unpack_nodeinfo(fed_nodeinfo_t *n, Buf buf) /* Extract node name from buffer and update global libstate * with this nodes' info. */ - _lock(); safe_unpack32(&magic, buf); if(magic != FED_NODEINFO_MAGIC) slurm_seterrno_ret(EBADMAGIC_FEDNODEINFO); @@ -1084,26 +1086,37 @@ fed_unpack_nodeinfo(fed_nodeinfo_t *n, Buf buf) /* Only copy the node_info structure if the caller wants it */ if(n != NULL) - if(_copy_node(n, tmp_n) != SLURM_SUCCESS) { - _unlock(); + if(_copy_node(n, tmp_n) != SLURM_SUCCESS) return SLURM_ERROR; - } #if FED_DEBUG _print_libstate(fed_state); #endif - _unlock(); return SLURM_SUCCESS; unpack_error: - _unlock(); /* FIX ME! Add code here to free allocated memory */ if(tmp_w) free(tmp_w); slurm_seterrno_ret(EUNPACK); } +/* Unpack nodeinfo and update persistent libstate. + * + * Used by: slurmctld + */ +int +fed_unpack_nodeinfo(fed_nodeinfo_t *n, Buf buf) +{ + int rc; + + _lock(); + rc = _unpack_nodeinfo(n, buf); + _unlock(); + return rc; +} + /* Used by: slurmd, slurmctld */ static void _free_adapter(fed_adapter_t *a) @@ -1622,25 +1635,25 @@ fed_unload_table(fed_jobinfo_t *jp) return SLURM_SUCCESS; } -int -fed_alloc_libstate(fed_libstate_t **l) +static fed_libstate_t * +_alloc_libstate(void) { fed_libstate_t *tmp; - assert(l); - tmp = (fed_libstate_t *)malloc(sizeof(fed_libstate_t)); - if(!tmp) - slurm_seterrno_ret(ENOMEM); + if(!tmp) { + slurm_seterrno(ENOMEM); + return NULL; + } tmp->magic = FED_LIBSTATE_MAGIC; tmp->node_count = 0; tmp->node_max = 0; tmp->node_list = NULL; tmp->hash_max = 0; tmp->hash_table = NULL; - *l = tmp; + tmp->key_index = 1; - return SLURM_SUCCESS; + return tmp; } /* Used by: slurmctld */ @@ -1658,7 +1671,7 @@ _copy_libstate(fed_libstate_t *dest, fed_libstate_t *src) assert(dest->node_count == 0); _lock(); - /* note: dest->node_count set by alloc_node */ + /* note: dest->node_count set by _alloc_node */ for(i = 0; i < src->node_count; i++) { tmp = _alloc_node(dest, NULL); err = _copy_node(tmp, &src->node_list[i]); @@ -1671,74 +1684,44 @@ _copy_libstate(fed_libstate_t *dest, fed_libstate_t *src) _unlock(); } -/* Needs to be called before any using any functions that - * manipulate persistent libstate. +/* Allocate and initialize memory for the persistent libstate. * * Used by: slurmctld */ int -fed_init(fed_libstate_t *l) +fed_init(void) { - int err; fed_libstate_t *tmp; - assert(!fed_state); - - err = fed_alloc_libstate(&tmp); - if(err != SLURM_SUCCESS) - return err; - if(l) { - assert(l->magic == FED_LIBSTATE_MAGIC); - _copy_libstate(tmp, l); - - } else { - tmp->node_count = 0; - tmp->node_max = 0; - tmp->node_list = NULL; - tmp->hash_max = 0; - tmp->hash_table = NULL; - tmp->key_index = 1; - } + tmp = _alloc_libstate(); + if(!tmp) + return SLURM_FAILURE; _lock(); + assert(!fed_state); fed_state = tmp; _unlock(); return SLURM_SUCCESS; } -/* Used by: slurmctld */ -void -fed_free_libstate(fed_libstate_t *lp) +static void +_free_libstate(fed_libstate_t *lp) { int i; if(!lp) return; - _lock(); for(i = 0; i < lp->node_count; i++) fed_free_nodeinfo(&lp->node_list[i]); if(lp->hash_table != NULL) free(lp->hash_table); free(lp); - _unlock(); } -/* Used by: slurmctld */ -void -fed_destroy(fed_libstate_t *lp) -{ - assert(fed_state); - - if(lp) - _copy_libstate(lp, fed_state); - if(fed_state) - fed_free_libstate(fed_state); - fed_state = NULL; -} /* Used by: slurmctld */ -int -fed_pack_libstate(fed_libstate_t *lp, Buf buffer) +static int +_pack_libstate(fed_libstate_t *lp, Buf buffer) { int offset; int i; @@ -1749,9 +1732,8 @@ fed_pack_libstate(fed_libstate_t *lp, Buf buffer) offset = get_buf_offset(buffer); pack32(lp->magic, buffer); pack32(lp->node_count, buffer); - for(i = 0; i < lp->node_count; i++) { + for(i = 0; i < lp->node_count; i++) (void)fed_pack_nodeinfo(&lp->node_list[i], buffer); - } /* don't pack hash_table, we'll just rebuild on restore */ pack16(lp->key_index, buffer); @@ -1759,10 +1741,21 @@ fed_pack_libstate(fed_libstate_t *lp, Buf buffer) } /* Used by: slurmctld */ -int -fed_unpack_libstate(fed_libstate_t *lp, Buf buffer) +void +fed_libstate_save(Buf buffer) +{ + _lock(); + _pack_libstate(fed_state, buffer); + _free_libstate(fed_state); + _unlock(); +} + +/* Used by: slurmctld */ +static int +_unpack_libstate(fed_libstate_t *lp, Buf buffer) { int offset; + int node_count; int i; assert(lp->magic == FED_LIBSTATE_MAGIC); @@ -1770,10 +1763,10 @@ fed_unpack_libstate(fed_libstate_t *lp, Buf buffer) offset = get_buf_offset(buffer); safe_unpack32(&lp->magic, buffer); - safe_unpack32(&lp->node_count, buffer); - for(i = 0; i < lp->node_count; i ++) { - (void)fed_unpack_nodeinfo(NULL, buffer); - } + safe_unpack32(&node_count, buffer); + for(i = 0; i < node_count; i++) + (void)_unpack_nodeinfo(NULL, buffer); + assert(lp->node_count == node_count); safe_unpack16(&lp->key_index, buffer); return SLURM_SUCCESS; @@ -1783,3 +1776,21 @@ unpack_error: return SLURM_ERROR; } +/* Used by: slurmctld */ +int +fed_libstate_restore(Buf buffer) +{ + int err; + + _lock(); + assert(!fed_state); + + fed_state = _alloc_libstate(); + if(!fed_state) + return SLURM_FAILURE; + _unpack_libstate(fed_state, buffer); + _unlock(); + + return SLURM_SUCCESS; +} + diff --git a/src/plugins/switch/federation/federation.h b/src/plugins/switch/federation/federation.h index 50ca7cb2e39..87ff81d6ac3 100644 --- a/src/plugins/switch/federation/federation.h +++ b/src/plugins/switch/federation/federation.h @@ -80,14 +80,12 @@ int fed_unpack_jobinfo(fed_jobinfo_t *jp, Buf buf); fed_jobinfo_t *fed_copy_jobinfo(fed_jobinfo_t *jp); void fed_free_jobinfo(fed_jobinfo_t *jp); int fed_load_table(fed_jobinfo_t *jp, int uid, int pid); -int fed_init(fed_libstate_t *lp); +int fed_init(void); void fed_init_cache(void); int fed_unload_table(fed_jobinfo_t *jp); -int fed_pack_libstate(fed_libstate_t *lp, Buf buffer); int fed_unpack_libstate(fed_libstate_t *lp, Buf buffer); -int fed_alloc_libstate(fed_libstate_t **lh); int fed_get_jobinfo(fed_jobinfo_t *jp, int key, void *data); -void fed_destroy(fed_libstate_t *lp); -void fed_free_libstate(fed_libstate_t *lp); +void fed_libstate_save(Buf buffer); +int fed_libstate_restore(Buf buffer); #endif /* _FEDERATION_INCLUDED */ diff --git a/src/plugins/switch/federation/switch_federation.c b/src/plugins/switch/federation/switch_federation.c index 81a9472a4c9..e345776a9c1 100644 --- a/src/plugins/switch/federation/switch_federation.c +++ b/src/plugins/switch/federation/switch_federation.c @@ -132,19 +132,14 @@ int fini ( void ) int switch_p_libstate_save ( char * dir_name ) { int err; - fed_libstate_t *old_state = NULL; Buf buffer; char *file_name; int ret = SLURM_SUCCESS; int state_fd; - err = fed_alloc_libstate(&old_state); - if(err) - return SLURM_ERROR; - fed_destroy(old_state); buffer = init_buf(FED_LIBSTATE_LEN); - (void)fed_pack_libstate(old_state, buffer); + (void)fed_libstate_save(buffer); file_name = xstrdup(dir_name); xstrcat(file_name, "/fed_state"); (void)unlink(file_name); @@ -176,8 +171,6 @@ int switch_p_libstate_save ( char * dir_name ) if(buffer) free_buf(buffer); - if(old_state) - fed_free_libstate(old_state); return ret; } @@ -185,13 +178,12 @@ int switch_p_libstate_save ( char * dir_name ) int switch_p_libstate_restore ( char * dir_name ) { char *data = NULL, *file_name; - fed_libstate_t *old_state = NULL; Buf buffer = NULL; int error_code = SLURM_SUCCESS; int state_fd, data_allocated = 0, data_read = 0, data_size = 0; if (dir_name == NULL) /* clean start, no recovery */ - return fed_init(NULL); + return fed_init(); file_name = xstrdup(dir_name); xstrcat(file_name, "/fed_state"); @@ -220,29 +212,20 @@ int switch_p_libstate_restore ( char * dir_name ) error("No %s file for Federation state recovery", file_name); error("Starting Federation with clean state"); xfree(file_name); - return fed_init(NULL); + return fed_init(); } if (error_code == SLURM_SUCCESS) { - if (fed_alloc_libstate(&old_state)) { - error_code = SLURM_ERROR; - } else { - buffer = create_buf (data, data_size); - data = NULL; /* now in buffer, don't xfree() */ - if (fed_unpack_libstate(old_state, buffer) < 0) - error_code = SLURM_ERROR; - } + buffer = create_buf (data, data_size); + data = NULL; /* now in buffer, don't xfree() */ + if (fed_libstate_restore(buffer) < 0) + error_code = SLURM_ERROR; } if (buffer) free_buf(buffer); xfree(data); - if (error_code == SLURM_SUCCESS) - error_code = fed_init(old_state); - if (old_state) - fed_free_libstate(old_state); - return error_code; } -- GitLab