diff --git a/src/plugins/select/bluegene/bg_job_place.c b/src/plugins/select/bluegene/bg_job_place.c index cc3c25fbee5848ca5518d2ef83fcd42048797042..de3dbaf086f7cb51eac80363f58e236b5b9a020e 100644 --- a/src/plugins/select/bluegene/bg_job_place.c +++ b/src/plugins/select/bluegene/bg_job_place.c @@ -932,20 +932,20 @@ static int _find_best_block_match(List block_list, goto end_it; if (req_geometry[0] != 0 && req_geometry[0] != (uint16_t)NO_VAL) { - char tmp_char[SYSTEM_DIMENSIONS+1]; + char tmp_geo[SYSTEM_DIMENSIONS+1]; target_size = 1; for (i=0; i<SYSTEM_DIMENSIONS; i++) { target_size *= req_geometry[i]; - tmp_char[i] = alpha_num[req_geometry[i]]; + tmp_geo[i] = alpha_num[req_geometry[i]]; } - tmp_char[i] = '\0'; + tmp_geo[i] = '\0'; if (target_size != min_nodes) { debug2("min_nodes not set correctly %u " "should be %u from %s", min_nodes, target_size, - tmp_char); + tmp_geo); min_nodes = target_size; } if (!req_nodes) diff --git a/src/plugins/select/bluegene/bg_read_config.c b/src/plugins/select/bluegene/bg_read_config.c index d7f29a527ab714d83f880d92f23a64d0d892157e..b9f11b05ddf8e96480d9d3497a11c9b3ebc37b6e 100644 --- a/src/plugins/select/bluegene/bg_read_config.c +++ b/src/plugins/select/bluegene/bg_read_config.c @@ -594,6 +594,12 @@ extern int read_bg_conf(void) "in bluegene.conf"); } +#ifdef HAVE_BGQ + /* You can only have 16 ionodes per midplane */ + if (bg_conf->ionodes_per_mp > bg_conf->mp_nodecard_cnt) + bg_conf->ionodes_per_mp = bg_conf->mp_nodecard_cnt; +#endif + if (bg_conf->ionodes_per_mp) { bitstr_t *tmp_bitmap = NULL; int small_size = 1; @@ -627,6 +633,7 @@ extern int read_bg_conf(void) else bg_conf->smallest_block=128; #else + if (bg_conf->io_ratio >= 2) bg_conf->smallest_block=16; else if (bg_conf->io_ratio == 1) diff --git a/src/plugins/select/bluegene/bl_bgq/bridge_linker.cc b/src/plugins/select/bluegene/bl_bgq/bridge_linker.cc index 74fcebefcfbf5871ff1cf5c7fb0403bd664b4edc..ba48dcc52bdc514907b98949d8f1a77bfe68e1cc 100644 --- a/src/plugins/select/bluegene/bl_bgq/bridge_linker.cc +++ b/src/plugins/select/bluegene/bl_bgq/bridge_linker.cc @@ -57,19 +57,28 @@ static void _setup_ba_mp(ComputeHardware::ConstPtr bgq, ba_mp_t *ba_mp) // int i; Midplane::Coordinates coords = {{ba_mp->coord[A], ba_mp->coord[X], ba_mp->coord[Y], ba_mp->coord[Z]}}; - Midplane::ConstPtr mp_ptr = bgq->getMidplane(coords); + Midplane::ConstPtr mp_ptr; int i; + try { + mp_ptr = bgq->getMidplane(coords); + } catch (const bgsched::InputException& err) { + int rc = bridge_handle_input_errors( + "ComputeHardware::getMidplane", + err.getError().toValue(), NULL); + if (rc != SLURM_SUCCESS) + return; + } + + ba_mp->loc = xstrdup(mp_ptr->getLocation().c_str()); + ba_mp->nodecard_loc = (char **)xmalloc(sizeof(char *) * bg_conf->mp_nodecard_cnt); for (i=0; i<bg_conf->mp_nodecard_cnt; i++) { NodeBoard::ConstPtr nodeboard = mp_ptr->getNodeBoard(i); ba_mp->nodecard_loc[i] = xstrdup(nodeboard->getLocation().c_str()); - info("%d is %s", i, ba_mp->nodecard_loc[i]); } - - ba_mp->loc = xstrdup(mp_ptr->getLocation().c_str()); } #endif @@ -292,27 +301,29 @@ extern int bridge_block_create(bg_record_t *bg_record) #ifdef HAVE_BG_FILES if (bg_record->node_cnt < bg_conf->mp_node_cnt) { bool use_nc[bg_conf->mp_nodecard_cnt]; - int i, nc_pos = 0; - int ionode_card = 0, nc_count = 0; - static int num_ncards = 0; - - if (!num_ncards) { - num_ncards = - bg_record->node_cnt/bg_conf->nodecard_node_cnt; - assert(num_ncards >= 1); + int i, nc_pos = 0, num_ncards = 0; + + num_ncards = bg_record->node_cnt/bg_conf->nodecard_node_cnt; + if (num_ncards < 1) { + error("You have to have at least 1 nodecard to make " + "a small block I got %d/%d = %d", + bg_record->node_cnt, bg_conf->nodecard_node_cnt, + num_ncards); + return SLURM_ERROR; } memset(use_nc, 0, sizeof(use_nc)); /* find out how many nodecards to get for each ionode */ - for(i = 0; i<bg_conf->ionodes_per_mp; i++) { + for (i = 0; i<bg_conf->ionodes_per_mp; i++) { if (bit_test(bg_record->ionode_bitmap, i)) { - int j=0; - for(j=0; j<bg_conf->nc_ratio; j++) + for (int j=0; j<bg_conf->nc_ratio; j++) use_nc[nc_pos+j] = 1; } nc_pos += bg_conf->nc_ratio; } - + // char tmp_char[256]; + // format_node_name(bg_record, tmp_char, sizeof(tmp_char)); + // info("creating %s %s", bg_record->bg_block_id, tmp_char); ba_mp = (ba_mp_t *)list_peek(bg_record->ba_mp_list); /* Since the nodeboard locations aren't set up in the copy of this pointer we need to go out a get the @@ -320,10 +331,10 @@ extern int bridge_block_create(bg_record_t *bg_record) */ ba_mp = coord2ba_mp(ba_mp->coord); for (i=0; i<bg_conf->mp_nodecard_cnt; i++) { - if (!use_nc[i]) - continue; - nodecards.push_back(ba_mp->nodecard_loc[i]); + if (use_nc[i]) + nodecards.push_back(ba_mp->nodecard_loc[i]); } + try { block_ptr = Block::create(nodecards); } catch (const bgsched::InputException& err) { @@ -574,9 +585,8 @@ extern int bridge_block_add_user(bg_record_t *bg_record, char *user_name) if (rc != SLURM_SUCCESS) return rc; } catch(...) { - // FIXME: this should do something, but for now we won't -// error("Remove block request failed ... continuing."); -// rc = SLURM_ERROR; + error("Add block user request failed ... continuing."); + rc = SLURM_ERROR; } #endif return rc; @@ -591,14 +601,26 @@ extern int bridge_block_remove_user(bg_record_t *bg_record, char *user_name) if (!bg_record || !bg_record->bg_block_id || !user_name) return SLURM_ERROR; - info("removing user %s from block %s", user_name, bg_record->bg_block_id); + info("removing user %s from block %s", + user_name, bg_record->bg_block_id); #ifdef HAVE_BG_FILES try { Block::removeUser(bg_record->bg_block_id, user_name); + } catch (const bgsched::InputException& err) { + rc = bridge_handle_input_errors("Block::removeUser", + err.getError().toValue(), + bg_record); + if (rc != SLURM_SUCCESS) + return rc; + } catch (const bgsched::RuntimeException& err) { + rc = bridge_handle_runtime_errors("Block::removeUser", + err.getError().toValue(), + bg_record); + if (rc != SLURM_SUCCESS) + return rc; } catch(...) { - // FIXME: this should do something, but for now we won't - // error("Remove block request failed ... continuing."); - // rc = REMOVE_USER_ERR; + error("Remove block user request failed ... continuing."); + rc = REMOVE_USER_ERR; } #endif return rc; @@ -657,12 +679,6 @@ extern int bridge_block_set_owner(bg_record_t *bg_record, char *user_name) return rc; } -extern int bridge_block_get_and_set_mps(bg_record_t *bg_record) -{ - - return SLURM_ERROR; -} - extern int bridge_blocks_load_curr(List curr_block_list) { int rc = SLURM_SUCCESS; @@ -719,7 +735,7 @@ extern int bridge_blocks_load_curr(List curr_block_list) bg_record->job_running = NO_JOB_RUNNING; if (block_ptr->isSmall()) { char bitstring[BITSIZE]; - int io_cnt, io_start; + int io_cnt, io_start, len; Block::NodeBoards nodeboards = block_ptr->getNodeBoards(); int nb_cnt = nodeboards.size(); @@ -731,10 +747,9 @@ extern int bridge_blocks_load_curr(List curr_block_list) /* From the first nodecard id we can figure out where to start from with the alloc of ionodes. */ - io_start = atoi((char*)nb_name.c_str()+1) + len = nb_name.length()-2; + io_start = atoi((char*)nb_name.c_str()+len) * bg_conf->io_ratio; - info("got nb name of %s %d %d", - nb_name.c_str(), io_start, nb_cnt); bg_record->ionode_bitmap = bit_alloc(bg_conf->ionodes_per_mp); diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c index 9b611483cc0011190c28d62c6f28e66763c349e9..dbbe5696e2c5083b63dafaadb79dc6446ca47f50 100644 --- a/src/plugins/select/bluegene/select_bluegene.c +++ b/src/plugins/select/bluegene/select_bluegene.c @@ -657,9 +657,15 @@ extern int init(void) we don't want to read the config or anything like that. */ _set_bg_lists(); - if (!bg_conf) + if (!bg_conf) { bg_conf = xmalloc(sizeof(bg_config_t)); - + /* set some defaults for most systems */ + bg_conf->mp_node_cnt = 512; + bg_conf->quarter_node_cnt = 128; + bg_conf->nodecard_node_cnt = 32; + bg_conf->mp_nodecard_cnt = bg_conf->mp_node_cnt + / bg_conf->nodecard_node_cnt; + } xfree(bg_conf->slurm_user_name); xfree(bg_conf->slurm_node_prefix); slurm_conf_lock();