Skip to content
Snippets Groups Projects
Commit 0233a59e authored by Danny Auble's avatar Danny Auble
Browse files

can delete multiple partitions in parallel now

parent a5de2644
No related branches found
No related tags found
No related merge requests found
......@@ -68,10 +68,13 @@ typedef struct bgl_update {
pm_partition_id_t bgl_part_id;
} bgl_update_t;
List bgl_update_list = NULL;
static List bgl_update_list = NULL;
static pthread_mutex_t agent_cnt_mutex = PTHREAD_MUTEX_INITIALIZER;
static pthread_mutex_t freed_cnt_mutex = PTHREAD_MUTEX_INITIALIZER;
static int agent_cnt = 0;
static int num_part_to_free = 0;
static int num_part_freed = 0;
static void _bgl_list_del(void *x);
static int _excise_block(List block_list,
......@@ -79,6 +82,7 @@ static int _excise_block(List block_list,
char *nodes);
static List _get_all_blocks(void);
static void * _part_agent(void *args);
static void * _mult_free_part(void *args);
static void _part_op(bgl_update_t *bgl_update_ptr);
static int _remove_job(db_job_id_t job_id);
static void _start_agent(bgl_update_t *bgl_update_ptr);
......@@ -219,9 +223,12 @@ static void _start_agent(bgl_update_t *bgl_update_ptr)
bgl_record_t *found_record = NULL;
char *user_name = uid_to_string(bgl_update_ptr->uid);
ListIterator itr;
pthread_attr_t attr_agent;
pthread_t thread_agent;
int retries;
bgl_record = find_bgl_record(bgl_update_ptr->bgl_part_id);
if(!bgl_record) {
error("partition %s not found in bgl_list",
bgl_update_ptr->bgl_part_id);
......@@ -235,20 +242,37 @@ static void _start_agent(bgl_update_t *bgl_update_ptr)
}
if(bgl_record->state == RM_PARTITION_FREE) {
num_part_to_free = 0;
num_part_freed = 0;
itr = list_iterator_create(bgl_list);
if(bgl_record->full_partition) {
debug("Using full partition freeing all others");
while ((found_record = (bgl_record_t*)
list_next(itr)) != NULL) {
if(found_record->state != RM_PARTITION_FREE) {
if (!found_record->full_partition) {
debug("destroying the "
"partition %s.",
found_record->
bgl_part_id);
bgl_free_partition(
found_record);
slurm_attr_init(&attr_agent);
if (pthread_attr_setdetachstate(
&attr_agent,
PTHREAD_CREATE_JOINABLE))
error("pthread_attr_setdetach"
"state error %m");
retries = 0;
while (pthread_create(&thread_agent,
&attr_agent,
_mult_free_part,
(void *)
found_record)) {
error("pthread_create "
"error %m");
if (++retries
> MAX_PTHREAD_RETRIES)
fatal("Can't create "
"pthread");
/* sleep and retry */
usleep(1000);
}
num_part_to_free++;
}
}
} else {
......@@ -269,7 +293,9 @@ static void _start_agent(bgl_update_t *bgl_update_ptr)
}
}
list_iterator_destroy(itr);
while(num_part_to_free != num_part_freed)
usleep(1000);
if((rc = boot_part(bgl_record,
bgl_update_ptr->node_use))
!= SLURM_SUCCESS) {
......@@ -438,6 +464,21 @@ static void *_part_agent(void *args)
return NULL;
}
/* Free multiple partitions in parallel */
static void *_mult_free_part(void *args)
{
bgl_record_t *bgl_record = (bgl_record_t*) args;
debug("destroying the partition %s.", bgl_record->bgl_part_id);
bgl_free_partition(bgl_record);
slurm_mutex_lock(&freed_cnt_mutex);
num_part_freed++;
slurm_mutex_unlock(&freed_cnt_mutex);
return NULL;
}
/* Perform an operation upon a BGL partition (block) for starting or
* terminating a job */
static void _part_op(bgl_update_t *bgl_update_ptr)
......
......@@ -29,16 +29,24 @@
#define MAX_POLL_RETRIES 110
#define POLL_INTERVAL 3
#define MAX_PTHREAD_RETRIES 1
/* Globals */
char *bgl_part_id = NULL;
int all_parts = 0;
#ifdef HAVE_BGL_FILES
static int num_part_to_free = 0;
static int num_part_freed = 0;
static pthread_mutex_t freed_cnt_mutex = PTHREAD_MUTEX_INITIALIZER;
/************
* Functions *
************/
#ifdef HAVE_BGL_FILES
static int _free_partition(char *bgl_part_id);
......@@ -47,6 +55,21 @@ static void _term_jobs_on_part(char *bgl_part_id);
static char *_bgl_err_str(status_t inx);
static int _remove_job(db_job_id_t job_id);
/* Free multiple partitions in parallel */
static void *_mult_free_part(void *args)
{
char *bgl_part_id = (char *) args;
debug("destroying the partition %s.", bgl_part_id);
_free_partition(bgl_part_id);
slurm_mutex_lock(&freed_cnt_mutex);
num_part_freed++;
slurm_mutex_unlock(&freed_cnt_mutex);
return NULL;
}
int main(int argc, char *argv[])
{
log_options_t opts = LOG_OPTS_STDERR_ONLY;
......@@ -55,7 +78,10 @@ int main(int argc, char *argv[])
int j, num_parts = 0;
rm_partition_t *part_ptr = NULL;
int rc;
pthread_attr_t attr_agent;
pthread_t thread_agent;
int retries;
log_init(xbasename(argv[0]), opts, SYSLOG_FACILITY_DAEMON, NULL);
parse_command_line(argc, argv);
if(!all_parts) {
......@@ -67,7 +93,8 @@ int main(int argc, char *argv[])
} else {
if ((rc = rm_get_partitions_info(part_state, &part_list))
!= STATUS_OK) {
error("rm_get_partitions_info(): %s", _bgl_err_str(rc));
error("rm_get_partitions_info(): %s",
_bgl_err_str(rc));
return -1;
}
......@@ -113,13 +140,40 @@ int main(int argc, char *argv[])
}
if(strncmp("RMP", bgl_part_id, 3))
continue;
_free_partition(bgl_part_id);
slurm_attr_init(&attr_agent);
if (pthread_attr_setdetachstate(
&attr_agent,
PTHREAD_CREATE_JOINABLE))
error("pthread_attr_setdetach"
"state error %m");
retries = 0;
while (pthread_create(&thread_agent,
&attr_agent,
_mult_free_part,
(void *)
bgl_part_id)) {
error("pthread_create "
"error %m");
if (++retries
> MAX_PTHREAD_RETRIES)
fatal("Can't create "
"pthread");
/* sleep and retry */
usleep(1000);
}
num_part_to_free++;
}
if ((rc = rm_free_partition_list(part_list)) != STATUS_OK) {
error("rm_free_partition_list(): %s",
_bgl_err_str(rc));
}
}
while(num_part_to_free != num_part_freed) {
info("waiting for all partitions to free...");
sleep(1);
}
return 0;
}
......@@ -132,7 +186,7 @@ static int _free_partition(char *bgl_part_id)
_term_jobs_on_part(bgl_part_id);
while (1) {
if((state = _update_bgl_record_state(bgl_part_id))
== SLURM_ERROR)
== -1)
break;
if (state != RM_PARTITION_FREE
&& state != RM_PARTITION_DEALLOCATING) {
......@@ -164,7 +218,7 @@ static int _update_bgl_record_state(char *bgl_part_id)
char *name = NULL;
rm_partition_list_t *part_list = NULL;
int j, rc, num_parts = 0;
rm_partition_state_t state = -1;
rm_partition_state_t state = -2;
rm_partition_t *part_ptr = NULL;
if ((rc = rm_get_partitions_info(part_state, &part_list))
......@@ -179,7 +233,7 @@ static int _update_bgl_record_state(char *bgl_part_id)
state = -1;
num_parts = 0;
}
for (j=0; j<num_parts; j++) {
if (j) {
if ((rc = rm_get_data(part_list,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment