diff --git a/src/plugins/burst_buffer/common/burst_buffer_common.c b/src/plugins/burst_buffer/common/burst_buffer_common.c index 2ec7bd61361f79cafb988533fa6c6716b462c000..2b3cb4588398b027bda7efb1aaaa1cf449e48c22 100644 --- a/src/plugins/burst_buffer/common/burst_buffer_common.c +++ b/src/plugins/burst_buffer/common/burst_buffer_common.c @@ -83,8 +83,6 @@ static pthread_mutex_t proc_count_mutex = PTHREAD_MUTEX_INITIALIZER; static void _bb_job_del2(bb_job_t *bb_job); static uid_t * _parse_users(char *buf); -static int _persist_match(void *x, void *key); -static void _persist_purge(void *x); static char * _print_users(uid_t *buf); /* Translate comma delimitted list of users into a UID array, @@ -1015,7 +1013,8 @@ extern bb_alloc_t *bb_alloc_job(bb_state_t *state_ptr, bb_alloc_t *bb_alloc; bb_alloc = bb_alloc_job_rec(state_ptr, job_ptr, bb_job); - bb_limit_add(bb_alloc->user_id, bb_alloc->size, state_ptr); + bb_limit_add(bb_alloc->user_id, bb_alloc->size, bb_alloc->pool, + state_ptr); return bb_alloc; } @@ -1254,82 +1253,6 @@ extern char *bb_run_script(char *script_type, char *script_path, return resp; } -static void _persist_purge(void *x) -{ - xfree(x); -} - -static int _persist_match(void *x, void *key) -{ - bb_pend_persist_t *bb_pers_exist = (bb_pend_persist_t *) x; - bb_pend_persist_t *bb_pers_test = (bb_pend_persist_t *) key; - if (bb_pers_exist->job_id == bb_pers_test->job_id) - return 1; - return 0; -} - -/* Add persistent burst buffer reservation for this job, tests for duplicate */ -extern void bb_add_persist(bb_state_t *state_ptr, - bb_pend_persist_t *bb_persist) -{ - bb_pend_persist_t *bb_pers_match; - - xassert(state_ptr); - if (!state_ptr->persist_resv_rec) { - state_ptr->persist_resv_rec = list_create(_persist_purge); - } else { - bb_pers_match = list_find_first(state_ptr->persist_resv_rec, - _persist_match, bb_persist); - if (bb_pers_match) - return; - } - - bb_pers_match = xmalloc(sizeof(bb_pend_persist_t)); - bb_pers_match->job_id = bb_persist->job_id; - bb_pers_match->persist_add = bb_persist->persist_add; - list_append(state_ptr->persist_resv_rec, bb_pers_match); - state_ptr->persist_resv_sz += bb_persist->persist_add; -} - -/* Remove persistent burst buffer reservation for this job. - * Call when job starts running or removed from pending state. */ -extern void bb_rm_persist(bb_state_t *state_ptr, uint32_t job_id) -{ - bb_pend_persist_t bb_persist; - bb_pend_persist_t *bb_pers_match; - - xassert(state_ptr); - if (!state_ptr->persist_resv_rec) - return; - bb_persist.job_id = job_id; - bb_pers_match = list_find_first(state_ptr->persist_resv_rec, - _persist_match, &bb_persist); - if (!bb_pers_match) - return; - if (state_ptr->persist_resv_sz >= bb_pers_match->persist_add) { - state_ptr->persist_resv_sz -= bb_pers_match->persist_add; - } else { - state_ptr->persist_resv_sz = 0; - error("%s: Reserved persistent storage size underflow", - __func__); - } -} - -/* Return true of the identified job has burst buffer space already reserved */ -extern bool bb_test_persist(bb_state_t *state_ptr, uint32_t job_id) -{ - bb_pend_persist_t bb_pers_match; - - xassert(state_ptr); - if (!state_ptr->persist_resv_rec) - return false; - bb_pers_match.job_id = job_id; - if (list_find_first(state_ptr->persist_resv_rec, _persist_match, - &bb_pers_match)) - return true; - return false; -} - /* Allocate a bb_job_t record, hashed by job_id, delete with bb_job_del() */ extern bb_job_t *bb_job_alloc(bb_state_t *state_ptr, uint32_t job_id) { @@ -1453,12 +1376,26 @@ extern void bb_job_log(bb_state_t *state_ptr, bb_job_t *bb_job) } /* Make claim against resource limit for a user */ -extern void bb_limit_add( - uint32_t user_id, uint64_t bb_size, bb_state_t *state_ptr) +extern void bb_limit_add(uint32_t user_id, uint64_t bb_size, char *pool, + bb_state_t *state_ptr) { + burst_buffer_gres_t *gres_ptr; bb_user_t *bb_user; + int i; - state_ptr->used_space += bb_size; + if (!pool || !xstrcmp(pool, state_ptr->bb_config.default_pool)) { + state_ptr->used_space += bb_size; + } else { + gres_ptr = state_ptr->bb_config.gres_ptr; + for (i = 0; i < state_ptr->bb_config.gres_cnt; i++, gres_ptr++){ + if (xstrcmp(pool, gres_ptr->name)) + continue; + gres_ptr->used_cnt += bb_size; + break; + } + if (i >= state_ptr->bb_config.gres_cnt) + error("%s: Unable to located pool %s", __func__, pool); + } bb_user = bb_find_user_rec(user_id, state_ptr); xassert(bb_user); @@ -1467,16 +1404,36 @@ extern void bb_limit_add( } /* Release claim against resource limit for a user */ -extern void bb_limit_rem( - uint32_t user_id, uint64_t bb_size, bb_state_t *state_ptr) +extern void bb_limit_rem(uint32_t user_id, uint64_t bb_size, char *pool, + bb_state_t *state_ptr) { + burst_buffer_gres_t *gres_ptr; bb_user_t *bb_user; + int i; - if (state_ptr->used_space >= bb_size) { - state_ptr->used_space -= bb_size; + if (!pool || !xstrcmp(pool, state_ptr->bb_config.default_pool)) { + if (state_ptr->used_space >= bb_size) { + state_ptr->used_space -= bb_size; + } else { + error("%s: used_space underflow", __func__); + state_ptr->used_space = 0; + } } else { - error("%s: used_space underflow", __func__); - state_ptr->used_space = 0; + gres_ptr = state_ptr->bb_config.gres_ptr; + for (i = 0; i < state_ptr->bb_config.gres_cnt; i++, gres_ptr++){ + if (xstrcmp(pool, gres_ptr->name)) + continue; + if (gres_ptr->used_cnt >= bb_size) { + gres_ptr->used_cnt -= bb_size; + } else { + error("%s: used_cnt underflow for pool %s", + __func__, pool); + gres_ptr->used_cnt = 0; + } + break; + } + if (i >= state_ptr->bb_config.gres_cnt) + error("%s: Unable to located pool %s", __func__, pool); } bb_user = bb_find_user_rec(user_id, state_ptr); diff --git a/src/plugins/burst_buffer/common/burst_buffer_common.h b/src/plugins/burst_buffer/common/burst_buffer_common.h index f8ca08347d8752249ec2d8463ee04e85a8bbf650..e49338c361777f8611c9b73b14e70846e0401a7b 100644 --- a/src/plugins/burst_buffer/common/burst_buffer_common.h +++ b/src/plugins/burst_buffer/common/burst_buffer_common.h @@ -174,12 +174,6 @@ typedef struct bb_job { uint32_t user_id; /* user the job runs as */ } bb_job_t; -/* Persistent buffer requests which are pending */ -typedef struct { - uint32_t job_id; - uint64_t persist_add; /* Persistent buffer space job adds, bytes */ -} bb_pend_persist_t; - /* Used for building queue of jobs records for various purposes */ typedef struct bb_job_queue_rec { uint64_t bb_size; /* Used by generic plugin only */ @@ -219,10 +213,6 @@ typedef struct bb_state { uint64_t used_space; /* units are bytes */ } bb_state_t; -/* Add persistent burst buffer reservation for this job, tests for duplicate */ -extern void bb_add_persist(bb_state_t *state_ptr, - bb_pend_persist_t *bb_persist); - /* Allocate burst buffer hash tables */ extern void bb_alloc_cache(bb_state_t *state_ptr); @@ -327,10 +317,6 @@ extern int bb_preempt_queue_sort(void *x, void *y); /* Return count of child processes */ extern int bb_proc_count(void); -/* Remove persistent burst buffer reservation for this job. - * Call when job starts running or removed from pending state. */ -extern void bb_rm_persist(bb_state_t *state_ptr, uint32_t job_id); - /* Set the bb_state's tres_pos for limit enforcement. * Value is set to -1 if not found. */ extern void bb_set_tres_pos(bb_state_t *state_ptr); @@ -345,9 +331,6 @@ extern void bb_shutdown(void); /* Sleep function, also handles termination signal */ extern void bb_sleep(bb_state_t *state_ptr, int add_secs); -/* Return true of the identified job has burst buffer space already reserved */ -extern bool bb_test_persist(bb_state_t *state_ptr, uint32_t job_id); - /* Execute a script, wait for termination and return its stdout. * script_type IN - Type of program being run (e.g. "StartStageIn") * script_path IN - Fully qualified pathname of the program to execute @@ -360,12 +343,12 @@ extern char *bb_run_script(char *script_type, char *script_path, char **script_argv, int max_wait, int *status); /* Make claim against resource limit for a user */ -extern void bb_limit_add( - uint32_t user_id, uint64_t bb_size, bb_state_t *state_ptr); +extern void bb_limit_add(uint32_t user_id, uint64_t bb_size, char *pool, + bb_state_t *state_ptr); /* Release claim against resource limit for a user */ -extern void bb_limit_rem( - uint32_t user_id, uint64_t bb_size, bb_state_t *state_ptr); +extern void bb_limit_rem(uint32_t user_id, uint64_t bb_size, char *pool, + bb_state_t *state_ptr); /* Log creation of a persistent burst buffer in the database * job_ptr IN - Point to job that created, could be NULL at startup diff --git a/src/plugins/burst_buffer/cray/burst_buffer_cray.c b/src/plugins/burst_buffer/cray/burst_buffer_cray.c index d3f0fb940779b74fd7c957b2665a7e901134ee3d..81059cfccd3f89c8143a6afdb801fd27b207b846 100644 --- a/src/plugins/burst_buffer/cray/burst_buffer_cray.c +++ b/src/plugins/burst_buffer/cray/burst_buffer_cray.c @@ -655,8 +655,8 @@ static void _apply_limits(void) bb_alloc = bb_state.bb_ahash[i]; while (bb_alloc) { _set_assoc_mgr_ptrs(bb_alloc); - bb_limit_add(bb_alloc->user_id, - bb_alloc->size, &bb_state); + bb_limit_add(bb_alloc->user_id, bb_alloc->size, + bb_alloc->pool, &bb_state); bb_alloc = bb_alloc->next; } } @@ -1175,8 +1175,8 @@ static void _load_state(bool init_config) if (!init_config) { /* Newly found buffer */ _pick_alloc_account(bb_alloc); - bb_limit_add(bb_alloc->user_id, - bb_alloc->size, &bb_state); + bb_limit_add(bb_alloc->user_id, bb_alloc->size, + bb_alloc->pool, &bb_state); } if (bb_alloc->job_id == 0) bb_post_persist_create(NULL, bb_alloc, &bb_state); @@ -1831,8 +1831,8 @@ static void *_start_teardown(void *x) _purge_bb_files(teardown_args->job_id, job_ptr); if (job_ptr) { if ((bb_alloc = bb_find_alloc_rec(&bb_state, job_ptr))){ - bb_limit_rem(bb_alloc->user_id, - bb_alloc->size, &bb_state); + bb_limit_rem(bb_alloc->user_id, bb_alloc->size, + bb_alloc->pool, &bb_state); (void) bb_free_alloc_rec(&bb_state, bb_alloc); } if ((bb_job = _get_bb_job(job_ptr))) @@ -1847,8 +1847,8 @@ static void *_start_teardown(void *x) teardown_args->user_id, &bb_state); if (bb_alloc) { - bb_limit_rem(bb_alloc->user_id, - bb_alloc->size, &bb_state); + bb_limit_rem(bb_alloc->user_id, bb_alloc->size, + bb_alloc->pool, &bb_state); (void) bb_free_alloc_rec(&bb_state, bb_alloc); } @@ -2156,8 +2156,8 @@ static void _timeout_bb_rec(void) "purged", __func__, bb_alloc->job_id); } - bb_limit_rem(bb_alloc->user_id, - bb_alloc->size, &bb_state); + bb_limit_rem(bb_alloc->user_id, bb_alloc->size, + bb_alloc->pool, &bb_state); bb_post_persist_delete(bb_alloc, &bb_state); *bb_pptr = bb_alloc->next; bb_free_alloc_buf(bb_alloc); @@ -3577,20 +3577,19 @@ static int _create_bufs(struct job_record *job_ptr, bb_job_t *bb_job, continue; } rc++; - bb_limit_add(job_ptr->user_id, - buf_ptr->size, &bb_state); + if (!buf_ptr->pool) { + buf_ptr->pool = + xstrdup(bb_state.bb_config.default_pool); + } + bb_limit_add(job_ptr->user_id, buf_ptr->size, + buf_ptr->pool, &bb_state); bb_job->state = BB_STATE_ALLOCATING; buf_ptr->state = BB_STATE_ALLOCATING; create_args = xmalloc(sizeof(create_buf_data_t)); create_args->access = xstrdup(buf_ptr->access); create_args->job_id = job_ptr->job_id; create_args->name = xstrdup(buf_ptr->name); - if (buf_ptr->pool) { - create_args->pool = xstrdup(buf_ptr->pool); - } else { - create_args->pool = - xstrdup(bb_state.bb_config.default_pool); - } + create_args->pool = xstrdup(buf_ptr->pool); create_args->size = buf_ptr->size; create_args->type = xstrdup(buf_ptr->type); create_args->user_id = job_ptr->user_id; @@ -3744,11 +3743,15 @@ static void _reset_buf_state(uint32_t user_id, uint32_t job_id, char *name, old_state = buf_ptr->state; buf_ptr->state = new_state; if ((old_state == BB_STATE_ALLOCATING) && - (new_state == BB_STATE_PENDING)) - bb_limit_rem(user_id, buf_ptr->size, &bb_state); + (new_state == BB_STATE_PENDING)) { + bb_limit_rem(user_id, buf_ptr->size, buf_ptr->pool, + &bb_state); + } if ((old_state == BB_STATE_DELETING) && - (new_state == BB_STATE_PENDING)) - bb_limit_rem(user_id, buf_ptr->size, &bb_state); + (new_state == BB_STATE_PENDING)) { + bb_limit_rem(user_id, buf_ptr->size, buf_ptr->pool, + &bb_state); + } if ((old_state == BB_STATE_ALLOCATING) && (new_state == BB_STATE_ALLOCATED) && ((name[0] < '0') || (name[0] > '9'))) { @@ -4015,8 +4018,8 @@ static void *_destroy_persistent(void *x) bb_alloc->state = BB_STATE_COMPLETE; bb_alloc->job_id = destroy_args->job_id; bb_alloc->state_time = time(NULL); - bb_limit_rem(bb_alloc->user_id, - bb_alloc->size, &bb_state); + bb_limit_rem(bb_alloc->user_id, bb_alloc->size, + bb_alloc->pool, &bb_state); assoc_mgr_lock(&assoc_locks); (void) bb_post_persist_delete(bb_alloc, &bb_state);