From 2238ce392d611818a2e2a96647946cae32efde7d Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Mon, 7 Jun 2010 16:22:59 +0000 Subject: [PATCH] added debug flags for the select plug --- slurm/slurm.h.in | 7 + src/common/read_config.c | 37 +- .../block_allocator/block_allocator.c | 673 ++++++++++-------- .../block_allocator/block_allocator.h | 1 + .../select/bluegene/plugin/bg_job_place.c | 314 ++++---- .../bluegene/plugin/bg_record_functions.c | 59 +- .../bluegene/plugin/bg_switch_connections.c | 56 +- src/plugins/select/bluegene/plugin/bluegene.c | 120 ++-- src/plugins/select/bluegene/plugin/bluegene.h | 1 + .../select/bluegene/plugin/dynamic_block.c | 141 ++-- .../select/bluegene/plugin/select_bluegene.c | 2 + 11 files changed, 855 insertions(+), 556 deletions(-) diff --git a/slurm/slurm.h.in b/slurm/slurm.h.in index ab692e0b2bb..eff5ca75771 100644 --- a/slurm/slurm.h.in +++ b/slurm/slurm.h.in @@ -1291,6 +1291,13 @@ typedef struct reservation_name_msg { #define DEBUG_FLAG_NO_CONF_HASH 0x00000020 /* no warning about slurm.conf * files checksum mismatch */ #define DEBUG_FLAG_GRES 0x00000040 /* Generic Resource info */ +#define DEBUG_FLAG_BG_PICK 0x00000080 /* debug for bluegene + * picking blocks */ +#define DEBUG_FLAG_BG_WIRES 0x00000100 /* debug for bluegene wires */ +#define DEBUG_FLAG_BG_ALGO 0x00000200 /* debug for bluegene algo */ +#define DEBUG_FLAG_BG_ALGO_DEEP 0x00000400 /* debug for bluegene + * algo deep */ +#define DEBUG_FLAG_PRIO 0x00000600 /* debug for priority plugin */ #define GROUP_FORCE 0x8000 /* if set, update group membership diff --git a/src/common/read_config.c b/src/common/read_config.c index 0553e488018..08f4308d2b5 100644 --- a/src/common/read_config.c +++ b/src/common/read_config.c @@ -2774,6 +2774,26 @@ extern char * debug_flags2str(uint32_t debug_flags) { char *rc = NULL; + if (debug_flags & DEBUG_FLAG_BG_ALGO) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "BGBlockAlgo"); + } + if (debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "BGBlockAlgoDeep"); + } + if (debug_flags & DEBUG_FLAG_BG_PICK) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "BGBlockPick"); + } + if (debug_flags & DEBUG_FLAG_BG_WIRES) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "BGBlockWires"); + } if (debug_flags & DEBUG_FLAG_CPU_BIND) { if (rc) xstrcat(rc, ","); @@ -2789,6 +2809,11 @@ extern char * debug_flags2str(uint32_t debug_flags) xstrcat(rc, ","); xstrcat(rc, "NO_CONF_HASH"); } + if (debug_flags & DEBUG_FLAG_PRIO) { + if (rc) + xstrcat(rc, ","); + xstrcat(rc, "Priority"); + } if (debug_flags & DEBUG_FLAG_SELECT_TYPE) { if (rc) xstrcat(rc, ","); @@ -2829,12 +2854,22 @@ extern uint32_t debug_str2flags(char *debug_flags) tmp_str = xstrdup(debug_flags); tok = strtok_r(tmp_str, ",", &last); while (tok) { - if (strcasecmp(tok, "CPU_Bind") == 0) + if (strcasecmp(tok, "BGBlockAlgo") == 0) + rc |= DEBUG_FLAG_BG_ALGO; + else if (strcasecmp(tok, "BGBlockAlgoDeep") == 0) + rc |= DEBUG_FLAG_BG_ALGO_DEEP; + else if (strcasecmp(tok, "BGBlockPick") == 0) + rc |= DEBUG_FLAG_BG_PICK; + else if (strcasecmp(tok, "BGBlockWires") == 0) + rc |= DEBUG_FLAG_BG_WIRES; + else if (strcasecmp(tok, "CPU_Bind") == 0) rc |= DEBUG_FLAG_CPU_BIND; else if (strcasecmp(tok, "Gres") == 0) rc = DEBUG_FLAG_GRES; else if (strcasecmp(tok, "NO_CONF_HASH") == 0) rc |= DEBUG_FLAG_NO_CONF_HASH; + else if (strcasecmp(tok, "Priority") == 0) + rc |= DEBUG_FLAG_PRIO; else if (strcasecmp(tok, "SelectType") == 0) rc |= DEBUG_FLAG_SELECT_TYPE; else if (strcasecmp(tok, "Steps") == 0) diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.c b/src/plugins/select/bluegene/block_allocator/block_allocator.c index 7558c0d49f0..ab0883c72ce 100644 --- a/src/plugins/select/bluegene/block_allocator/block_allocator.c +++ b/src/plugins/select/bluegene/block_allocator/block_allocator.c @@ -75,6 +75,7 @@ int cluster_base = 36; #endif uint32_t cluster_flags = 0; char *p = '\0'; +uint32_t ba_debug_flags = 0; /* extern Global */ my_bluegene_t *bg = NULL; @@ -178,7 +179,7 @@ static void _switch_config(ba_node_t* source, ba_node_t* target, int dim, /* */ static int _set_external_wires(int dim, int count, ba_node_t* source, - ba_node_t* target); + ba_node_t* target); /* */ static char *_set_internal_wires(List nodes, int size, int conn_type); @@ -357,7 +358,8 @@ extern int parse_image(void **dest, slurm_parser_enum_t type, n = xmalloc(sizeof(image_t)); n->name = xstrdup(value); n->def = false; - debug3("image %s", n->name); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("image %s", n->name); n->groups = list_create(destroy_image_group_list); s_p_get_string(&tmp, "Groups", tbl); if(tmp) { @@ -368,9 +370,11 @@ extern int parse_image(void **dest, slurm_parser_enum_t type, snprintf(image_group->name, (i-j)+1, "%s", tmp+j); gid_from_string (image_group->name, - &image_group->gid); - debug3("adding group %s %d", image_group->name, - image_group->gid); + &image_group->gid); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("adding group %s %d", + image_group->name, + image_group->gid); list_append(n->groups, image_group); j=i; j++; @@ -385,9 +389,10 @@ extern int parse_image(void **dest, slurm_parser_enum_t type, fatal("Invalid bluegene.conf parameter " "Groups=%s", image_group->name); - else - debug3("adding group %s %d", image_group->name, - image_group->gid); + else if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("adding group %s %d", + image_group->name, + image_group->gid); list_append(n->groups, image_group); } xfree(tmp); @@ -484,7 +489,7 @@ extern int new_ba_request(ba_request_t* ba_request) if(!(cluster_flags & CLUSTER_FLAG_BG)) { if(geo[X] != NO_VAL) { - for (i=0; i<cluster_dims; i++){ + for (i=0; i<cluster_dims; i++) { if ((geo[i] < 1) || (geo[i] > DIM_SIZE[i])) { error("new_ba_request Error, " "request geometry is invalid %d", @@ -569,21 +574,22 @@ extern int new_ba_request(ba_request_t* ba_request) } sz = ba_request->size % (DIM_SIZE[Y] * DIM_SIZE[Z]); if(!sz) { - i = ba_request->size / (DIM_SIZE[Y] * DIM_SIZE[Z]); - geo[X] = i; - geo[Y] = DIM_SIZE[Y]; - geo[Z] = DIM_SIZE[Z]; - sz=ba_request->size; - if((geo[X]*geo[Y]*geo[Z]) == ba_request->size) - _append_geo(geo, - ba_request->elongate_geos, - ba_request->rotate); - else - error("%d I was just trying to add a " - "geo of %d%d%d " - "while I am trying to request %d midplanes", - __LINE__, geo[X], geo[Y], geo[Z], - ba_request->size); + i = ba_request->size / (DIM_SIZE[Y] * DIM_SIZE[Z]); + geo[X] = i; + geo[Y] = DIM_SIZE[Y]; + geo[Z] = DIM_SIZE[Z]; + sz=ba_request->size; + if((geo[X]*geo[Y]*geo[Z]) == ba_request->size) + _append_geo(geo, + ba_request->elongate_geos, + ba_request->rotate); + else + error("%d I was just trying to add a " + "geo of %d%d%d " + "while I am trying to request " + "%d midplanes", + __LINE__, geo[X], geo[Y], geo[Z], + ba_request->size); } // startagain: picked=0; @@ -739,7 +745,7 @@ extern int new_ba_request(ba_request_t* ba_request) else error("%d I was just trying to add a geo of %d%d%d " "while I am trying to request %d midplanes", - __LINE__, geo[X], geo[Y], geo[Z], + __LINE__, geo[X], geo[Y], geo[Z], ba_request->size); /* see if We can find a cube or square root of the @@ -839,7 +845,7 @@ extern void print_ba_request(ba_request_t* ba_request) /** * empty a list that we don't want to destroy the memory of the * elements always returns 1 -*/ + */ extern int empty_null_destroy_list(void *arg, void *key) { return 1; @@ -869,14 +875,15 @@ extern void ba_init(node_info_msg_t *node_info_ptr, bool sanity_check) int rc = 0; #endif /* HAVE_BG_FILES */ - cluster_dims = slurmdb_setup_cluster_dims(); - cluster_flags = slurmdb_setup_cluster_flags(); - /* We only need to initialize once, so return if already done so. */ if (_initialized) { return; } + cluster_dims = slurmdb_setup_cluster_dims(); + cluster_flags = slurmdb_setup_cluster_flags(); + set_ba_debug_flags(slurm_get_debug_flags()); + #ifdef HAVE_BG_FILES bridge_init(); #endif @@ -1043,8 +1050,8 @@ node_info_error: } if ((bg != NULL) - && ((rc = bridge_get_data(bg, RM_Msize, &bp_size)) - == STATUS_OK)) { + && ((rc = bridge_get_data(bg, RM_Msize, &bp_size)) + == STATUS_OK)) { verbose("BlueGene configured with " "%d x %d x %d base blocks", bp_size.X, bp_size.Y, bp_size.Z); @@ -1182,6 +1189,10 @@ extern void ba_fini() // debug3("pa system destroyed"); } +extern void set_ba_debug_flags(uint32_t debug_flags) +{ + ba_debug_flags = debug_flags; +} /* * set the node in the internal configuration as in, or not in use, @@ -1271,10 +1282,11 @@ extern int copy_node_path(List nodes, List *dest_nodes) list_iterator_destroy(itr2); if(!new_ba_node) { - debug3("adding %c%c%c as a new node", - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("adding %c%c%c as a new node", + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]]); new_ba_node = ba_copy_node(ba_node); _new_ba_node(new_ba_node, ba_node->coord, false); list_push(*dest_nodes, new_ba_node); @@ -1490,12 +1502,14 @@ extern int check_and_set_node_list(List nodes) node_flags = curr_ba_node->state & NODE_STATE_FLAGS; if (!(node_flags & (NODE_STATE_DRAIN | NODE_STATE_FAIL)) && (base_state != NODE_STATE_DOWN)) { - debug4("I have already been to " - "this node %c%c%c %s", - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]], - node_state_string(curr_ba_node->state)); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("I have already been to " + "this node %c%c%c %s", + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]], + node_state_string( + curr_ba_node->state)); rc = SLURM_ERROR; goto end_it; } @@ -1513,17 +1527,22 @@ extern int check_and_set_node_list(List nodes) if(ba_switch->int_wire[j].used && curr_ba_switch->int_wire[j].used - && j != curr_ba_switch-> + && j != curr_ba_switch-> int_wire[j].port_tar) { - debug4("%c%c%c dim %d port %d " - "is already in use to %d", - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]], - i, - j, - curr_ba_switch-> - int_wire[j].port_tar); + if(ba_debug_flags + & DEBUG_FLAG_BG_ALGO_DEEP) + info("%c%c%c dim %d port %d " + "is already in use to %d", + alpha_num[ba_node-> + coord[X]], + alpha_num[ba_node-> + coord[Y]], + alpha_num[ba_node-> + coord[Z]], + i, + j, + curr_ba_switch-> + int_wire[j].port_tar); rc = SLURM_ERROR; goto end_it; } @@ -1593,7 +1612,7 @@ extern char *set_bg_block(List results, int *start, alpha_num[geometry[Z]]); return NULL; } - //info("looking at %d%d%d", geometry[X], geometry[Y], geometry[Z]); + //info("looking at %d%d%d", geometry[X], geometry[Y], geometry[Z]); size = geometry[X] * geometry[Y] * geometry[Z]; ba_node = &ba_system_ptr->grid[start[X]][start[Y]][start[Z]]; } @@ -1618,11 +1637,12 @@ extern char *set_bg_block(List results, int *start, if(ba_node->letter == '.') { ba_node->letter = letters[color_count%62]; ba_node->color = colors[color_count%6]; - debug4("count %d setting letter = %c " - "color = %d", - color_count, - ba_node->letter, - ba_node->color); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("count %d setting letter = %c " + "color = %d", + color_count, + ba_node->letter, + ba_node->color); color_count++; } goto end_it; @@ -2186,7 +2206,8 @@ extern int load_block_wiring(char *bg_block_id) ba_switch_t *ba_switch = NULL; int *geo = NULL; - debug3("getting info for block %s\n", bg_block_id); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("getting info for block %s\n", bg_block_id); if ((rc = bridge_get_block(bg_block_id, &block_ptr)) != STATUS_OK) { error("bridge_get_block(%s): %s", @@ -2202,7 +2223,8 @@ extern int load_block_wiring(char *bg_block_id) return SLURM_ERROR; } if(!switch_cnt) { - debug4("no switch_cnt"); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("no switch_cnt"); if ((rc = bridge_get_data(block_ptr, RM_PartitionFirstBP, &curr_bp)) @@ -2275,8 +2297,9 @@ extern int load_block_wiring(char *bg_block_id) bg_err_str(rc)); return SLURM_ERROR; } - debug3("switch id = %s dim %d conns = %d", - switchid, dim, cnt); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("switch id = %s dim %d conns = %d", + switchid, dim, cnt); ba_switch = &ba_system_ptr-> grid[geo[X]][geo[Y]][geo[Z]].axis_switch[dim]; for (j=0; j<cnt; j++) { @@ -2288,7 +2311,7 @@ extern int load_block_wiring(char *bg_block_id) != STATUS_OK) { error("bridge_get_data: " "RM_SwitchNextConnection: %s", - bg_err_str(rc)); + bg_err_str(rc)); return SLURM_ERROR; } } else { @@ -2348,8 +2371,9 @@ extern int load_block_wiring(char *bg_block_id) ba_system_ptr->grid[geo[X]][geo[Y]][geo[Z]]. used = true; } - debug4("connection going from %d -> %d", - curr_conn.p1, curr_conn.p2); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("connection going from %d -> %d", + curr_conn.p1, curr_conn.p2); if(ba_switch->int_wire[curr_conn.p1].used) { debug("%c%c%c dim %d port %d " @@ -2412,7 +2436,8 @@ extern List get_and_set_block_wiring(char *bg_block_id, List results = list_create(destroy_ba_node); ListIterator itr = NULL; - debug3("getting info for block %s\n", bg_block_id); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("getting info for block %s\n", bg_block_id); if ((rc = bridge_get_data(block_ptr, RM_PartitionSwitchNum, &switch_cnt)) != STATUS_OK) { @@ -2421,7 +2446,8 @@ extern List get_and_set_block_wiring(char *bg_block_id, goto end_it; } if(!switch_cnt) { - debug4("no switch_cnt"); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("no switch_cnt"); if ((rc = bridge_get_data(block_ptr, RM_PartitionFirstBP, &curr_bp)) @@ -2500,8 +2526,9 @@ extern List get_and_set_block_wiring(char *bg_block_id, bg_err_str(rc)); goto end_it; } - debug3("switch id = %s dim %d conns = %d", - switchid, dim, cnt); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("switch id = %s dim %d conns = %d", + switchid, dim, cnt); itr = list_iterator_create(results); while((ba_node = list_next(itr))) { @@ -2529,7 +2556,7 @@ extern List get_and_set_block_wiring(char *bg_block_id, != STATUS_OK) { error("bridge_get_data: " "RM_SwitchNextConnection: %s", - bg_err_str(rc)); + bg_err_str(rc)); goto end_it; } } else { @@ -2587,8 +2614,9 @@ extern List get_and_set_block_wiring(char *bg_block_id, } ba_node->used = true; } - debug4("connection going from %d -> %d", - curr_conn.p1, curr_conn.p2); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("connection going from %d -> %d", + curr_conn.p1, curr_conn.p2); if(ba_switch->int_wire[curr_conn.p1].used) { debug("%c%c%c dim %d port %d " @@ -2650,14 +2678,15 @@ extern int validate_coord(int *coord) if(coord[X]>=DIM_SIZE[X] || coord[Y]>=DIM_SIZE[Y] || coord[Z]>=DIM_SIZE[Z]) { - debug4("got coord %c%c%c greater than what we are using " - "%c%c%c", - alpha_num[coord[X]], - alpha_num[coord[Y]], - alpha_num[coord[Z]], - alpha_num[DIM_SIZE[X]], - alpha_num[DIM_SIZE[Y]], - alpha_num[DIM_SIZE[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("got coord %c%c%c greater than what we are using " + "%c%c%c", + alpha_num[coord[X]], + alpha_num[coord[Y]], + alpha_num[coord[Z]], + alpha_num[DIM_SIZE[X]], + alpha_num[DIM_SIZE[Y]], + alpha_num[DIM_SIZE[Z]]); return 0; } #endif @@ -2816,9 +2845,10 @@ static int _append_geo(int *geometry, List geos, int rotate) geo[X] = geometry[X]; geo[Y] = geometry[Y]; geo[Z] = geometry[Z]; - debug4("adding geo %c%c%c", - alpha_num[geo[X]], alpha_num[geo[Y]], - alpha_num[geo[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("adding geo %c%c%c", + alpha_num[geo[X]], alpha_num[geo[Y]], + alpha_num[geo[Z]]); list_append(geos, geo); } return 1; @@ -2880,10 +2910,15 @@ static int _fill_in_coords(List results, List start_list, continue; if (!_node_used(ba_node, geometry[X])) { - debug4("here Adding %c%c%c", - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); + if(ba_debug_flags + & DEBUG_FLAG_BG_ALGO_DEEP) + info("here Adding %c%c%c", + alpha_num[ba_node-> + coord[X]], + alpha_num[ba_node-> + coord[Y]], + alpha_num[ba_node-> + coord[Z]]); list_append(results, ba_node); next_switch = &ba_node->axis_switch[X]; @@ -2924,7 +2959,7 @@ static int _fill_in_coords(List results, List start_list, debug("We don't allow Y passthoughs"); rc = 0; } else if((*deny_pass & PASS_DENY_Z) - && (*deny_pass & PASS_FOUND_Z)) { + && (*deny_pass & PASS_FOUND_Z)) { debug("We don't allow Z passthoughs"); rc = 0; } @@ -2984,16 +3019,17 @@ static int _copy_the_path(List nodes, ba_switch_t *curr_switch, node_curr = curr_switch->ext_wire[0].node_tar; node_tar = curr_switch->ext_wire[port_tar].node_tar; if(mark_switch->int_wire[source].used) - debug3("setting dim %d %c%c%c %d-> %c%c%c %d", - dim, - alpha_num[node_curr[X]], - alpha_num[node_curr[Y]], - alpha_num[node_curr[Z]], - source, - alpha_num[node_tar[X]], - alpha_num[node_tar[Y]], - alpha_num[node_tar[Z]], - port_tar); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("setting dim %d %c%c%c %d-> %c%c%c %d", + dim, + alpha_num[node_curr[X]], + alpha_num[node_curr[Y]], + alpha_num[node_curr[Z]], + source, + alpha_num[node_tar[X]], + alpha_num[node_tar[Y]], + alpha_num[node_tar[Z]], + port_tar); if(port_tar == 1) { /* found the end of the line */ @@ -3052,10 +3088,11 @@ static int _copy_the_path(List nodes, ba_switch_t *curr_switch, [mark_node_tar[Z]]); _new_ba_node(ba_node, mark_node_tar, false); list_push(nodes, ba_node); - debug4("haven't seen %c%c%c adding it", - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("haven't seen %c%c%c adding it", + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]]); } next_mark_switch = &ba_node->axis_switch[dim]; @@ -3078,11 +3115,12 @@ static int _find_yz_path(ba_node_t *ba_node, int *first, for(i2=1;i2<=2;i2++) { if(geometry[i2] > 1) { - debug4("%d node %c%c%c port 2 -> ", - i2, - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("%d node %c%c%c port 2 -> ", + i2, + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]]); dim_curr_switch = &ba_node->axis_switch[i2]; if(dim_curr_switch->int_wire[2].used) { @@ -3095,13 +3133,15 @@ static int _find_yz_path(ba_node_t *ba_node, int *first, next_node = &ba_system_ptr-> grid[node_tar[X]][node_tar[Y]][node_tar[Z]]; dim_next_switch = &next_node->axis_switch[i2]; - debug4("%c%c%c port 5", - alpha_num[next_node->coord[X]], - alpha_num[next_node->coord[Y]], - alpha_num[next_node->coord[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("%c%c%c port 5", + alpha_num[next_node->coord[X]], + alpha_num[next_node->coord[Y]], + alpha_num[next_node->coord[Z]]); if(dim_next_switch->int_wire[5].used) { - debug3("returning here 2"); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("returning here 2"); return 0; } debug5("%d %d %d %d",i2, node_tar[i2], @@ -3144,16 +3184,20 @@ static int _find_yz_path(ba_node_t *ba_node, int *first, PASS_FOUND_Z; } while(node_tar[i2] != first[i2]) { - debug4("on dim %d at %d " - "looking for %d", - i2, - node_tar[i2], - first[i2]); + if(ba_debug_flags + & DEBUG_FLAG_BG_ALGO_DEEP) + info("on dim %d at %d " + "looking for %d", + i2, + node_tar[i2], + first[i2]); if(dim_curr_switch-> int_wire[2].used) { - debug4("returning " - "here 3"); + if(ba_debug_flags + & DEBUG_FLAG_BG_ALGO_DEEP) + info("returning " + "here 3"); return 0; } @@ -3182,11 +3226,13 @@ static int _find_yz_path(ba_node_t *ba_node, int *first, axis_switch[i2]; } - debug4("back to first on dim %d " - "at %d looking for %d", - i2, - node_tar[i2], - first[i2]); + if(ba_debug_flags + & DEBUG_FLAG_BG_ALGO_DEEP) + info("back to first on dim %d " + "at %d looking for %d", + i2, + node_tar[i2], + first[i2]); dim_curr_switch-> int_wire[5].used = 1; @@ -3253,11 +3299,12 @@ static int _find_yz_path(ba_node_t *ba_node, int *first, */ dim_curr_switch = &ba_node->axis_switch[i2]; - debug4("%d node %c%c%c port 0 -> 1", - i2, - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("%d node %c%c%c port 0 -> 1", + i2, + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]]); dim_curr_switch->int_wire[0].used = 1; dim_curr_switch->int_wire[0].port_tar = 1; dim_curr_switch->int_wire[1].used = 1; @@ -3341,9 +3388,10 @@ static int _reset_the_path(ba_switch_t *curr_switch, int source, /*set the switch to not be used */ if(!curr_switch->int_wire[source].used) { /* This means something overlapping the removing block - already cleared this, or the path just never was - complete in the first place. */ - debug3("I reached the end, the source isn't used"); + already cleared this, or the path just never was + complete in the first place. */ + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("I reached the end, the source isn't used"); return 1; } curr_switch->int_wire[source].used = 0; @@ -3406,7 +3454,7 @@ static void _new_ba_node(ba_node_t *ba_node, int *coord, bool track_down_nodes) uint16_t node_base_state = ba_node->state & NODE_STATE_BASE; if(((node_base_state != NODE_STATE_DOWN) - && !(ba_node->state & NODE_STATE_DRAIN)) || !track_down_nodes) + && !(ba_node->state & NODE_STATE_DRAIN)) || !track_down_nodes) ba_node->used = false; for (i=0; i<cluster_dims; i++){ @@ -3530,28 +3578,31 @@ start_again: x = startx-1; while(x!=startx) { x++; - debug4("finding %c%c%c try %d", - alpha_num[ba_request->geometry[X]], - alpha_num[ba_request->geometry[Y]], - alpha_num[ba_request->geometry[Z]], - x); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("finding %c%c%c try %d", + alpha_num[ba_request->geometry[X]], + alpha_num[ba_request->geometry[Y]], + alpha_num[ba_request->geometry[Z]], + x); new_node: - debug3("starting at %c%c%c", - alpha_num[start[X]], - alpha_num[start[Y]], - alpha_num[start[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("starting at %c%c%c", + alpha_num[start[X]], + alpha_num[start[Y]], + alpha_num[start[Z]]); ba_node = &ba_system_ptr->grid[start[X]][start[Y]][start[Z]]; if (!_node_used(ba_node, ba_request->geometry[X])) { - debug4("trying this node %c%c%c %c%c%c %d", - alpha_num[start[X]], - alpha_num[start[Y]], - alpha_num[start[Z]], - alpha_num[ba_request->geometry[X]], - alpha_num[ba_request->geometry[Y]], - alpha_num[ba_request->geometry[Z]], - ba_request->conn_type); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("trying this node %c%c%c %c%c%c %d", + alpha_num[start[X]], + alpha_num[start[Y]], + alpha_num[start[Z]], + alpha_num[ba_request->geometry[X]], + alpha_num[ba_request->geometry[Y]], + alpha_num[ba_request->geometry[Z]], + ba_request->conn_type); name = set_bg_block(results, start, ba_request->geometry, @@ -3622,10 +3673,11 @@ static bool _node_used(ba_node_t* ba_node, int x_size) ba_switch_t* ba_switch = NULL; /* if we've used this node in another block already */ if (!ba_node || ba_node->used) { - debug4("node %c%c%c used", - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("node %c%c%c used", + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]]); return true; } /* Check If we've used this node's switches completely in another @@ -3644,10 +3696,11 @@ static bool _node_used(ba_node_t* ba_node, int x_size) other they must be connected to the other ports. */ if(ba_switch->int_wire[3].used && ba_switch->int_wire[5].used) { - debug4("switch full in the X dim on node %c%c%c!", - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("switch full in the X dim on node %c%c%c!", + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]]); return true; } } @@ -3834,16 +3887,17 @@ static int _set_external_wires(int dim, int count, ba_node_t* source, _port_enum(from_port), _port_enum(to_port)); - debug3("dim %d from %c%c%c %d -> %c%c%c %d", - dim, - alpha_num[source->coord[X]], - alpha_num[source->coord[Y]], - alpha_num[source->coord[Z]], - _port_enum(from_port), - alpha_num[target->coord[X]], - alpha_num[target->coord[Y]], - alpha_num[target->coord[Z]], - _port_enum(to_port)); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("dim %d from %c%c%c %d -> %c%c%c %d", + dim, + alpha_num[source->coord[X]], + alpha_num[source->coord[Y]], + alpha_num[source->coord[Z]], + _port_enum(from_port), + alpha_num[target->coord[X]], + alpha_num[target->coord[Y]], + alpha_num[target->coord[Z]], + _port_enum(to_port)); } #else _switch_config(source, source, dim, 0, 0); @@ -4129,7 +4183,8 @@ static char *_set_internal_wires(List nodes, int size, int conn_type) alpha_num[ba_node[count]->coord[X]], alpha_num[ba_node[count]->coord[Y]], alpha_num[ba_node[count]->coord[Z]]); - debug4("name = %s", temp_name); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("name = %s", temp_name); count++; hostlist_push(hostlist, temp_name); } @@ -4146,11 +4201,12 @@ static char *_set_internal_wires(List nodes, int size, int conn_type) if(ba_node[i]->letter == '.') { ba_node[i]->letter = letters[color_count%62]; ba_node[i]->color = colors[color_count%6]; - debug4("count %d setting letter = %c " - "color = %d", - color_count, - ba_node[i]->letter, - ba_node[i]->color); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("count %d setting letter = %c " + "color = %d", + color_count, + ba_node[i]->letter, + ba_node[i]->color); set=1; } } else { @@ -4243,7 +4299,8 @@ static int _find_x_path(List results, ba_node_t *ba_node, return 0; } - debug4("Algo(%d) found - %d", algo, found); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("Algo(%d) found - %d", algo, found); /* Check the 2 ports we can leave though in ports_to_try */ for(i=0;i<2;i++) { @@ -4283,14 +4340,16 @@ static int _find_x_path(List results, ba_node_t *ba_node, already been before */ itr = list_iterator_create(results); while((next_node = list_next(itr))) { - debug4("Algo(%d) looking at %c%c%c and %c%c%c", - algo, - alpha_num[next_node->coord[X]], - alpha_num[next_node->coord[Y]], - alpha_num[next_node->coord[Z]], - alpha_num[node_tar[X]], - alpha_num[node_tar[Y]], - alpha_num[node_tar[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("Algo(%d) looking at %c%c%c " + "and %c%c%c", + algo, + alpha_num[next_node->coord[X]], + alpha_num[next_node->coord[Y]], + alpha_num[next_node->coord[Z]], + alpha_num[node_tar[X]], + alpha_num[node_tar[Y]], + alpha_num[node_tar[Z]]); if((node_tar[X] == next_node->coord[X] && node_tar[Y] == next_node->coord[Y] && node_tar[Z] == next_node->coord[Z])) { @@ -4300,8 +4359,9 @@ static int _find_x_path(List results, ba_node_t *ba_node, } list_iterator_destroy(itr); if(not_first && found < DIM_SIZE[X]) { - debug3("Algo(%d) already been there before", - algo); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) already been there " + "before", algo); not_first = 0; continue; } @@ -4314,17 +4374,22 @@ static int _find_x_path(List results, ba_node_t *ba_node, next_switch = &next_node->axis_switch[X]; if((conn_type == SELECT_MESH) && (found == (x_size))) { - debug3("Algo(%d) we found the end of the mesh", - algo); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) we found the end of " + "the mesh", algo); return 1; } - debug4("Algo(%d) Broke = %d Found = %d x_size = %d", - algo, broke, found, x_size); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("Algo(%d) Broke = %d Found = %d " + "x_size = %d", + algo, broke, found, x_size); if(broke && (found == x_size)) { goto found_path; } else if(found == x_size) { - debug3("Algo(%d) finishing the torus!", algo); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) finishing the torus!", + algo); if(deny_pass && (*deny_pass & PASS_DENY_X)) { info("we don't allow passthroughs 1"); @@ -4346,9 +4411,10 @@ static int _find_x_path(List results, ba_node_t *ba_node, curr_switch, 0, X, 0, start); if(best_count < BEST_COUNT_INIT) { - debug3("Algo(%d) Found a best path " - "with %d steps.", - algo, best_count); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) Found a best path " + "with %d steps.", + algo, best_count); _set_best_path(); return 1; } else { @@ -4360,18 +4426,19 @@ static int _find_x_path(List results, ba_node_t *ba_node, } if (!_node_used(next_node, x_size)) { - debug3("Algo(%d) found %d looking at %c%c%c " - "%d going to %c%c%c %d", - algo, - found, - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]], - ports_to_try[i], - alpha_num[node_tar[X]], - alpha_num[node_tar[Y]], - alpha_num[node_tar[Z]], - port_tar); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) found %d looking at " + "%c%c%c %d going to %c%c%c %d", + algo, + found, + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]], + ports_to_try[i], + alpha_num[node_tar[X]], + alpha_num[node_tar[Y]], + alpha_num[node_tar[Z]], + port_tar); itr = list_iterator_create(results); while((check_node = list_next(itr))) { if((node_tar[X] == check_node->coord[X] @@ -4384,19 +4451,24 @@ static int _find_x_path(List results, ba_node_t *ba_node, } list_iterator_destroy(itr); if(!check_node) { - debug3("Algo(%d) add %c%c%c", - algo, - alpha_num[next_node->coord[X]], - alpha_num[next_node->coord[Y]], - alpha_num[next_node->coord[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) add %c%c%c", + algo, + alpha_num[next_node-> + coord[X]], + alpha_num[next_node-> + coord[Y]], + alpha_num[next_node-> + coord[Z]]); list_append(results, next_node); } else { - debug3("Algo(%d) Hey this is already " - "added %c%c%c", - algo, - alpha_num[node_tar[X]], - alpha_num[node_tar[Y]], - alpha_num[node_tar[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) Hey this is " + "already added %c%c%c", + algo, + alpha_num[node_tar[X]], + alpha_num[node_tar[Y]], + alpha_num[node_tar[Z]]); continue; } found++; @@ -4410,19 +4482,24 @@ static int _find_x_path(List results, ba_node_t *ba_node, continue; } else { found_path: - debug3("Algo(%d) added node %c%c%c " - "%d %d -> %c%c%c %d %d", - algo, - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]], - source_port, - ports_to_try[i], - alpha_num[node_tar[X]], - alpha_num[node_tar[Y]], - alpha_num[node_tar[Z]], - port_tar, - target_port); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) added node " + "%c%c%c %d %d -> " + "%c%c%c %d %d", + algo, + alpha_num[ba_node-> + coord[X]], + alpha_num[ba_node-> + coord[Y]], + alpha_num[ba_node-> + coord[Z]], + source_port, + ports_to_try[i], + alpha_num[node_tar[X]], + alpha_num[node_tar[Y]], + alpha_num[node_tar[Z]], + port_tar, + target_port); curr_switch->int_wire[source_port].used = 1; curr_switch->int_wire @@ -4450,15 +4527,17 @@ static int _find_x_path(List results, ba_node_t *ba_node, } if(algo == BLOCK_ALGO_FIRST) { - debug3("Algo(%d) couldn't find path", algo); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) couldn't find path", algo); return 0; } else if(algo == BLOCK_ALGO_SECOND) { - debug3("Algo(%d) looking for the next free node " - "starting at %c%c%c", - algo, - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) looking for the next free node " + "starting at %c%c%c", + algo, + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]]); if(best_path) list_flush(best_path); @@ -4473,8 +4552,9 @@ static int _find_x_path(List results, ba_node_t *ba_node, _find_next_free_using_port_2(curr_switch, 0, results, X, 0); if(best_count < BEST_COUNT_INIT) { - debug3("Algo(%d) yes found next free %d", algo, - best_count); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) yes found next free %d", algo, + best_count); node_tar = _set_best_path(); if(deny_pass && (*deny_pass & PASS_DENY_X) @@ -4489,16 +4569,17 @@ static int _find_x_path(List results, ba_node_t *ba_node, next_switch = &next_node->axis_switch[X]; - debug3("Algo(%d) found %d looking at %c%c%c " - "going to %c%c%c %d", - algo, found, - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]], - alpha_num[node_tar[X]], - alpha_num[node_tar[Y]], - alpha_num[node_tar[Z]], - port_tar); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) found %d looking at %c%c%c " + "going to %c%c%c %d", + algo, found, + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]], + alpha_num[node_tar[X]], + alpha_num[node_tar[Y]], + alpha_num[node_tar[Z]], + port_tar); list_append(results, next_node); found++; @@ -4510,12 +4591,14 @@ static int _find_x_path(List results, ba_node_t *ba_node, found--; _reset_the_path(curr_switch, 0, 1, X); _remove_node(results, next_node->coord); - debug3("Algo(%d) couldn't finish " - "the path off this one", algo); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) couldn't finish " + "the path off this one", algo); } } - debug3("Algo(%d) couldn't find path", algo); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("Algo(%d) couldn't find path", algo); return 0; } @@ -4536,17 +4619,19 @@ static int _remove_node(List results, int *node_tar) if(node_tar[X] == ba_node->coord[X] && node_tar[Y] == ba_node->coord[Y] && node_tar[Z] == ba_node->coord[Z]) { - debug3("removing %c%c%c from list", - alpha_num[node_tar[X]], - alpha_num[node_tar[Y]], - alpha_num[node_tar[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("removing %c%c%c from list", + alpha_num[node_tar[X]], + alpha_num[node_tar[Y]], + alpha_num[node_tar[Z]]); list_remove (itr); break; } #else if(node_tar[X] == ba_node->coord[X]) { - debug3("removing %d from list", - node_tar[X]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("removing %d from list", + node_tar[X]); list_remove (itr); break; } @@ -4603,10 +4688,11 @@ static int _find_next_free_using_port_2(ba_switch_t *curr_switch, if(!broke && count>0 && !ba_system_ptr->grid[node_tar[X]][node_tar[Y]][node_tar[Z]].used) { - debug3("this one not found %c%c%c", - alpha_num[node_tar[X]], - alpha_num[node_tar[Y]], - alpha_num[node_tar[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("this one not found %c%c%c", + alpha_num[node_tar[X]], + alpha_num[node_tar[Y]], + alpha_num[node_tar[Z]]); broke = 0; if((source_port%2)) @@ -4680,11 +4766,12 @@ static int _find_next_free_using_port_2(ba_switch_t *curr_switch, path_add->out = port_to_try; list_push(path, path_add); _find_next_free_using_port_2(next_switch, - port_tar, nodes, - dim, count); + port_tar, nodes, + dim, count); while((temp_switch = list_pop(path)) != path_add){ xfree(temp_switch); - debug4("something here 1"); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("something here 1"); } } } @@ -4828,14 +4915,20 @@ static int _finish_torus(List results, allow it. */ itr = list_iterator_create(results); while((next_node = list_next(itr))) { - debug4("finishing_torus: " - "looking at %c%c%c and %c%c%c", - alpha_num[next_node->coord[X]], - alpha_num[next_node->coord[Y]], - alpha_num[next_node->coord[Z]], - alpha_num[node_tar[X]], - alpha_num[node_tar[Y]], - alpha_num[node_tar[Z]]); + if(ba_debug_flags + & DEBUG_FLAG_BG_ALGO_DEEP) + info("finishing_torus: " + "looking at %c%c%c " + "and %c%c%c", + alpha_num[next_node-> + coord[X]], + alpha_num[next_node-> + coord[Y]], + alpha_num[next_node-> + coord[Z]], + alpha_num[node_tar[X]], + alpha_num[node_tar[Y]], + alpha_num[node_tar[Z]]); if((node_tar[X] == next_node->coord[X]) && (node_tar[Y] == next_node->coord[Y]) @@ -4846,12 +4939,17 @@ static int _finish_torus(List results, } list_iterator_destroy(itr); if(next_node) { - debug3("finishing_torus: " - "Can't finish torus with " - "%c%c%c we already were there.", - alpha_num[next_node->coord[X]], - alpha_num[next_node->coord[Y]], - alpha_num[next_node->coord[Z]]); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO) + info("finishing_torus: " + "Can't finish torus with " + "%c%c%c we already were " + "there.", + alpha_num[next_node-> + coord[X]], + alpha_num[next_node-> + coord[Y]], + alpha_num[next_node-> + coord[Z]]); continue; } @@ -4868,13 +4966,15 @@ static int _finish_torus(List results, while((temp_switch = list_pop(path)) != path_add){ xfree(temp_switch); - debug4("something here 3"); + if(ba_debug_flags + & DEBUG_FLAG_BG_ALGO_DEEP) + info("something here 3"); } } } - } - xfree(path_add); - return 0; + } + xfree(path_add); + return 0; } /* @@ -4899,11 +4999,12 @@ static int *_set_best_path() *deny_pass |= PASS_FOUND_X; debug2("got a passthrough in X"); } - debug4("mapping %c%c%c %d->%d", - alpha_num[path_switch->geometry[X]], - alpha_num[path_switch->geometry[Y]], - alpha_num[path_switch->geometry[Z]], - path_switch->in, path_switch->out); + if(ba_debug_flags & DEBUG_FLAG_BG_ALGO_DEEP) + info("mapping %c%c%c %d->%d", + alpha_num[path_switch->geometry[X]], + alpha_num[path_switch->geometry[Y]], + alpha_num[path_switch->geometry[Z]], + path_switch->in, path_switch->out); if(!geo) geo = path_switch->geometry; curr_switch = &ba_system_ptr->grid diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.h b/src/plugins/select/bluegene/block_allocator/block_allocator.h index 01801d484c9..ae97ec540cf 100644 --- a/src/plugins/select/bluegene/block_allocator/block_allocator.h +++ b/src/plugins/select/bluegene/block_allocator/block_allocator.h @@ -355,6 +355,7 @@ extern void init_wires(); * destroy all the internal (global) data structs. */ extern void ba_fini(); +extern void set_ba_debug_flags(uint32_t debug_flags); /* * set the node in the internal configuration as in, or not in use, diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index cb2cd144e3b..a8a505b473a 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -49,21 +49,21 @@ #define _DEBUG 0 #define MAX_GROUPS 128 -#define SWAP(a,b,t) \ -_STMT_START { \ - (t) = (a); \ - (a) = (b); \ - (b) = (t); \ -} _STMT_END +#define SWAP(a,b,t) \ + _STMT_START { \ + (t) = (a); \ + (a) = (b); \ + (b) = (t); \ + } _STMT_END pthread_mutex_t create_dynamic_mutex = PTHREAD_MUTEX_INITIALIZER; static void _rotate_geo(uint16_t *req_geometry, int rot_cnt); static int _get_user_groups(uint32_t user_id, uint32_t group_id, - gid_t *groups, int max_groups, int *ngroups); + gid_t *groups, int max_groups, int *ngroups); static int _test_image_perms(char *image_name, List image_list, - struct job_record* job_ptr); + struct job_record* job_ptr); #ifdef HAVE_BGL static int _check_images(struct job_record* job_ptr, char **blrtsimage, char **linuximage, @@ -105,16 +105,16 @@ static void _rotate_geo(uint16_t *req_geometry, int rot_cnt) uint16_t tmp; switch (rot_cnt) { - case 0: /* ABC -> ACB */ - case 2: /* CAB -> CBA */ - case 4: /* BCA -> BAC */ - SWAP(req_geometry[Y], req_geometry[Z], tmp); - break; - case 1: /* ACB -> CAB */ - case 3: /* CBA -> BCA */ - case 5: /* BAC -> ABC */ - SWAP(req_geometry[X], req_geometry[Y], tmp); - break; + case 0: /* ABC -> ACB */ + case 2: /* CAB -> CBA */ + case 4: /* BCA -> BAC */ + SWAP(req_geometry[Y], req_geometry[Z], tmp); + break; + case 1: /* ACB -> CAB */ + case 3: /* CBA -> BCA */ + case 5: /* BAC -> ABC */ + SWAP(req_geometry[X], req_geometry[Y], tmp); + break; } } @@ -205,16 +205,16 @@ static int _check_images(struct job_record* job_ptr, char **blrtsimage, char **linuximage, char **mloaderimage, char **ramdiskimage) #else -static int _check_images(struct job_record* job_ptr, - char **linuximage, - char **mloaderimage, char **ramdiskimage) + static int _check_images(struct job_record* job_ptr, + char **linuximage, + char **mloaderimage, char **ramdiskimage) #endif { int allow = 0; #ifdef HAVE_BGL select_g_select_jobinfo_get(job_ptr->select_jobinfo, - SELECT_JOBDATA_BLRTS_IMAGE, blrtsimage); + SELECT_JOBDATA_BLRTS_IMAGE, blrtsimage); if (*blrtsimage) { allow = _test_image_perms(*blrtsimage, bg_conf->blrts_list, @@ -229,7 +229,7 @@ static int _check_images(struct job_record* job_ptr, } #endif select_g_select_jobinfo_get(job_ptr->select_jobinfo, - SELECT_JOBDATA_LINUX_IMAGE, linuximage); + SELECT_JOBDATA_LINUX_IMAGE, linuximage); if (*linuximage) { allow = _test_image_perms(*linuximage, bg_conf->linux_list, job_ptr); @@ -241,7 +241,7 @@ static int _check_images(struct job_record* job_ptr, } select_g_select_jobinfo_get(job_ptr->select_jobinfo, - SELECT_JOBDATA_MLOADER_IMAGE, mloaderimage); + SELECT_JOBDATA_MLOADER_IMAGE, mloaderimage); if (*mloaderimage) { allow = _test_image_perms(*mloaderimage, bg_conf->mloader_list, @@ -256,7 +256,7 @@ static int _check_images(struct job_record* job_ptr, } select_g_select_jobinfo_get(job_ptr->select_jobinfo, - SELECT_JOBDATA_RAMDISK_IMAGE, ramdiskimage); + SELECT_JOBDATA_RAMDISK_IMAGE, ramdiskimage); if (*ramdiskimage) { allow = _test_image_perms(*ramdiskimage, bg_conf->ramdisk_list, @@ -287,9 +287,11 @@ static bg_record_t *_find_matching_block(List block_list, ListIterator itr = NULL; char tmp_char[256]; - debug("number of blocks to check: %d state %d", - list_count(block_list), - query_mode); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("number of blocks to check: %d state %d " + "asking for %u-%u cpus", + list_count(block_list), + query_mode, request->procs, max_cpus); itr = list_iterator_create(block_list); while ((bg_record = list_next(itr))) { @@ -303,41 +305,45 @@ static bg_record_t *_find_matching_block(List block_list, if(bg_record->job_ptr) bg_record->job_running = bg_record->job_ptr->job_id; - debug3("%s job_running = %d", - bg_record->bg_block_id, bg_record->job_running); + if((bg_record->job_running != NO_JOB_RUNNING) + && (bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK)) + info("%s job_running = %d", + bg_record->bg_block_id, bg_record->job_running); /*block is messed up some how (BLOCK_ERROR_STATE) * ignore it or if state == RM_PARTITION_ERROR */ if((bg_record->job_running == BLOCK_ERROR_STATE) || (bg_record->state == RM_PARTITION_ERROR)) { - debug("block %s is in an error state (can't use)", - bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("block %s is in an error " + "state (can't use)", + bg_record->bg_block_id); continue; } else if((bg_record->job_running != NO_JOB_RUNNING) && (bg_record->job_running != job_ptr->job_id) && (bg_conf->layout_mode == LAYOUT_DYNAMIC || (SELECT_IS_MODE_RUN_NOW(query_mode) && bg_conf->layout_mode != LAYOUT_DYNAMIC))) { - debug("block %s in use by %s job %d", - bg_record->bg_block_id, - bg_record->user_name, - bg_record->job_running); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("block %s in use by %s job %d", + bg_record->bg_block_id, + bg_record->user_name, + bg_record->job_running); continue; } /* Check processor count */ - debug3("asking for %u-%u looking at %d", - request->procs, max_cpus, bg_record->cpu_cnt); if ((bg_record->cpu_cnt < request->procs) || ((max_cpus != NO_VAL) && (bg_record->cpu_cnt > max_cpus))) { /* We use the proccessor count per block here mostly to see if we can run on a smaller block. - */ + */ convert_num_unit((float)bg_record->cpu_cnt, tmp_char, sizeof(tmp_char), UNIT_NONE); - debug("block %s CPU count (%s) not suitable", - bg_record->bg_block_id, - tmp_char); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("block %s CPU count (%s) not suitable", + bg_record->bg_block_id, + tmp_char); continue; } @@ -349,8 +355,10 @@ static bg_record_t *_find_matching_block(List block_list, * SLURM block not available to this job. */ if (!bit_super_set(bg_record->bitmap, slurm_block_bitmap)) { - debug("bg block %s has nodes not usable by this job", - bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("bg block %s has nodes not " + "usable by this job", + bg_record->bg_block_id); continue; } @@ -360,8 +368,9 @@ static bg_record_t *_find_matching_block(List block_list, if (job_ptr->details->req_node_bitmap && (!bit_super_set(job_ptr->details->req_node_bitmap, bg_record->bitmap))) { - debug("bg block %s lacks required nodes", - bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("bg block %s lacks required nodes", + bg_record->bg_block_id); continue; } @@ -430,15 +439,16 @@ static bg_record_t *_find_matching_block(List block_list, goto good_conn_type; } #endif - debug("bg block %s conn-type not usable asking for %s " - "bg_record is %s", - bg_record->bg_block_id, - conn_type_string(request->conn_type), - conn_type_string(bg_record->conn_type)); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("bg block %s conn-type not usable " + "asking for %s bg_record is %s", + bg_record->bg_block_id, + conn_type_string(request->conn_type), + conn_type_string(bg_record->conn_type)); continue; } #ifndef HAVE_BGL - good_conn_type: + good_conn_type: #endif /*****************************************/ /* match up geometry as "best" possible */ @@ -468,7 +478,8 @@ static bg_record_t *_find_matching_block(List block_list, if (!match) continue; /* Not usable */ } - debug2("we found one! %s", bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("we found one! %s", bg_record->bg_block_id); break; } list_iterator_destroy(itr); @@ -487,7 +498,7 @@ static int _check_for_booted_overlapping_blocks( int overlap = 0; bool is_test = SELECT_IS_TEST(query_mode); - /* this test only is for actually picking a block not testing */ + /* this test only is for actually picking a block not testing */ if(is_test && bg_conf->layout_mode == LAYOUT_DYNAMIC) return rc; @@ -498,9 +509,10 @@ static int _check_for_booted_overlapping_blocks( while ((found_record = (bg_record_t*)list_next(itr)) != NULL) { if ((!found_record->bg_block_id) || (bg_record == found_record)) { - debug4("Don't need to look at myself %s %s", - bg_record->bg_block_id, - found_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("Don't need to look at myself %s %s", + bg_record->bg_block_id, + found_record->bg_block_id); continue; } @@ -520,11 +532,13 @@ static int _check_for_booted_overlapping_blocks( if(is_test && overlapped_list && found_record->job_ptr && bg_record->job_running == NO_JOB_RUNNING) { - debug2("found over lapping block %s " - "overlapped %s with job %u", - found_record->bg_block_id, - bg_record->bg_block_id, - found_record->job_ptr->job_id); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_PICK) + info("found over lapping block %s " + "overlapped %s with job %u", + found_record->bg_block_id, + bg_record->bg_block_id, + found_record->job_ptr->job_id); ListIterator itr = list_iterator_create( overlapped_list); bg_record_t *tmp_rec = NULL; @@ -579,49 +593,63 @@ static int _check_for_booted_overlapping_blocks( bg_record->bg_block_id, found_record->bg_block_id); else - debug("can't use %s, there is " - "a job (%d) running on " - "an overlapping " - "block %s", - bg_record->bg_block_id, - found_record->job_running, - found_record->bg_block_id); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_PICK) + info("can't use %s, there is " + "a job (%d) running on " + "an overlapping " + "block %s", + bg_record->bg_block_id, + found_record->job_running, + found_record->bg_block_id); if(bg_conf->layout_mode == LAYOUT_DYNAMIC) { List temp_list = list_create(NULL); /* this will remove and * destroy the memory for * bg_record - */ + */ list_remove(bg_record_itr); slurm_mutex_lock(&block_state_mutex); if(bg_record->original) { - debug3("This was a copy"); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_PICK) + info("This was a copy"); found_record = bg_record->original; remove_from_bg_list( bg_lists->main, found_record); } else { - debug("looking for original"); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_PICK) + info("looking for " + "original"); found_record = find_and_remove_org_from_bg_list( bg_lists->main, bg_record); } - debug("Removing unusable block %s " - "from the system.", - bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_PICK) + info("Removing unusable block " + "%s from the system.", + bg_record->bg_block_id); if(!found_record) { - debug("This record %s wasn't " - "found in the " - "bg_lists->main, " - "no big deal, it " - "probably wasn't added", - bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_PICK) + info("This record %s " + "wasn't found in " + "the " + "bg_lists->main, " + "no big deal, it " + "probably wasn't " + "added", + bg_record-> + bg_block_id); found_record = bg_record; } else destroy_bg_record(bg_record); @@ -663,7 +691,8 @@ static int _dynamically_request(List block_list, int *blocks_added, int start_geo[SYSTEM_DIMENSIONS]; memcpy(start_geo, request->geometry, sizeof(int)*SYSTEM_DIMENSIONS); - debug2("going to create %d", request->size); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("going to create %d", request->size); list_of_lists = list_create(NULL); if(user_req_nodes) @@ -691,11 +720,13 @@ static int _dynamically_request(List block_list, int *blocks_added, 3- see if we can create one in the non job running bps */ - debug("trying with %d", create_try); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("trying with %d", create_try); if((new_blocks = create_dynamic_block(block_list, request, temp_list, true))) { bg_record_t *bg_record = NULL; + while((bg_record = list_pop(new_blocks))) { if(block_exist_in_list(block_list, bg_record)) destroy_bg_record(bg_record); @@ -800,8 +831,9 @@ static int _find_best_block_match(List block_list, } if(!is_test && (req_procs > avail_cpus)) { - debug2("asking for %u I only got %d", - req_procs, avail_cpus); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("asking for %u I only got %d", + req_procs, avail_cpus); return SLURM_ERROR; } @@ -832,12 +864,12 @@ static int _find_best_block_match(List block_list, for (i=0; i<SYSTEM_DIMENSIONS; i++) target_size *= (uint16_t)req_geometry[i]; if(target_size != min_nodes) { - debug2("min_nodes not set correctly %u should be %u " - "from %u%u%u", - min_nodes, target_size, - req_geometry[X], - req_geometry[Y], - req_geometry[Z]); + debug2("min_nodes not set correctly %u " + "should be %u from %u%u%u", + min_nodes, target_size, + req_geometry[X], + req_geometry[Y], + req_geometry[Z]); min_nodes = target_size; } if(!req_nodes) @@ -947,8 +979,7 @@ static int _find_best_block_match(List block_list, format_node_name(bg_record, tmp_char, sizeof(tmp_char)); debug("_find_best_block_match %s <%s>", - bg_record->bg_block_id, - tmp_char); + bg_record->bg_block_id, tmp_char); bit_and(slurm_block_bitmap, bg_record->bitmap); rc = SLURM_SUCCESS; *found_bg_record = bg_record; @@ -994,7 +1025,8 @@ static int _find_best_block_match(List block_list, List new_blocks = NULL; List job_list = list_create(NULL); ListIterator itr = NULL; - debug("trying with empty machine"); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("trying with empty machine"); /* Here we need to make sure the blocks in the job list are those in the block list so go @@ -1035,15 +1067,21 @@ static int _find_best_block_match(List block_list, if((bg_record = list_pop(job_list))) { if(bg_record->job_ptr) { - debug2("taking off %d(%s) " - "started at %d " - "ends at %d", - bg_record->job_running, - bg_record->bg_block_id, - bg_record->job_ptr-> - start_time, - bg_record->job_ptr-> - end_time); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_PICK) + info("taking off " + "%d(%s) started " + "at %d ends at %d", + bg_record-> + job_running, + bg_record-> + bg_block_id, + bg_record-> + job_ptr-> + start_time, + bg_record-> + job_ptr-> + end_time); /* Mark the block as not running a job, this should @@ -1057,12 +1095,14 @@ static int _find_best_block_match(List block_list, */ bg_record->job_running = NO_JOB_RUNNING; - } else if(bg_record->job_running + } else if((bg_record->job_running == BLOCK_ERROR_STATE) - debug2("taking off (%s) " - "which is in an error " - "state", - bg_record->bg_block_id); + && (bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_PICK)) + info("taking off (%s) " + "which is in an " + "error state", + bg_record->bg_block_id); } else /* This means we didn't have any jobs to take off @@ -1098,7 +1138,9 @@ static int _find_best_block_match(List block_list, */ (*found_bg_record) = list_pop(new_blocks); if(!(*found_bg_record)) { - error("got an empty list back"); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_PICK) + error("got an empty list back"); list_destroy(new_blocks); if(bg_record) continue; @@ -1169,7 +1211,9 @@ static int _sync_block_lists(List full_list, List incomp_list) if(!bg_record) { list_remove(itr); - debug4("adding %s", new_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("adding %s %x", + new_record->bg_block_id, new_record); list_append(incomp_list, new_record); count++; } @@ -1422,6 +1466,10 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, if(bg_conf->layout_mode == LAYOUT_DYNAMIC) slurm_mutex_lock(&create_dynamic_mutex); + slurm_mutex_lock(&block_state_mutex); + block_list = copy_bg_list(bg_lists->main); + slurm_mutex_unlock(&block_state_mutex); + select_g_select_jobinfo_get(job_ptr->select_jobinfo, SELECT_JOBDATA_CONN_TYPE, &conn_type); if(conn_type == SELECT_NAV) { @@ -1456,32 +1504,29 @@ extern int submit_job(struct job_record *job_ptr, bitstr_t *slurm_block_bitmap, buf, sizeof(buf), SELECT_PRINT_BLRTS_IMAGE); #ifdef HAVE_BGL - debug2("BlrtsImage=%s", buf); + debug3("BlrtsImage=%s", buf); select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, buf, sizeof(buf), SELECT_PRINT_LINUX_IMAGE); #endif #ifdef HAVE_BGL - debug2("LinuxImage=%s", buf); + debug3("LinuxImage=%s", buf); #else - debug2("ComputNodeImage=%s", buf); + debug3("ComputNodeImage=%s", buf); #endif select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, buf, sizeof(buf), SELECT_PRINT_MLOADER_IMAGE); - debug2("MloaderImage=%s", buf); + debug3("MloaderImage=%s", buf); select_g_select_jobinfo_sprint(job_ptr->select_jobinfo, buf, sizeof(buf), SELECT_PRINT_RAMDISK_IMAGE); #ifdef HAVE_BGL - debug2("RamDiskImage=%s", buf); + debug3("RamDiskImage=%s", buf); #else - debug2("RamDiskIoLoadImage=%s", buf); + debug3("RamDiskIoLoadImage=%s", buf); #endif - slurm_mutex_lock(&block_state_mutex); - block_list = copy_bg_list(bg_lists->main); - slurm_mutex_unlock(&block_state_mutex); /* First look at the empty space, and then remove the preemptable jobs and try again. */ @@ -1521,10 +1566,10 @@ preempt: SELECT_JOBDATA_IONODES, bg_record->ionodes); if(!bg_record->bg_block_id) { - debug2("%d can start unassigned job %u at " - "%u on %s", - local_mode, job_ptr->job_id, starttime, - bg_record->nodes); + debug("%d can start unassigned job %u " + "at %u on %s", + local_mode, job_ptr->job_id, + starttime, bg_record->nodes); select_g_select_jobinfo_set( job_ptr->select_jobinfo, @@ -1540,10 +1585,10 @@ preempt: error("Small block used in " "non-shared partition"); - debug2("%d can start job %u at %u on %s(%s)", - local_mode, job_ptr->job_id, starttime, - bg_record->bg_block_id, - bg_record->nodes); + debug("%d can start job %u at %u on %s(%s)", + local_mode, job_ptr->job_id, + starttime, bg_record->bg_block_id, + bg_record->nodes); if (SELECT_IS_MODE_RUN_NOW(local_mode)) { select_g_select_jobinfo_set( @@ -1556,8 +1601,15 @@ preempt: /* SELECT_JOBDATA_CONN_TYPE, */ /* &conn_type); */ if(job_ptr) { - job_ptr->job_state - |= JOB_CONFIGURING; + info("setting it up here %s %x", + bg_record->bg_block_id, + bg_record); + bg_record->job_running = + job_ptr->job_id; + bg_record->job_ptr = job_ptr; + + job_ptr->job_state |= + JOB_CONFIGURING; last_job_update = time(NULL); } } else @@ -1593,8 +1645,10 @@ preempt: * lists. But we don't want to do * that so we will set blocks_added to * 0 so it doesn't happen. */ - if(!blocks_added) + if(!blocks_added) { destroy_bg_record(bg_record); + bg_record = NULL; + } blocks_added = 0; } } else { diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.c b/src/plugins/select/bluegene/plugin/bg_record_functions.c index ec3075c3a36..9ed9247c0cb 100644 --- a/src/plugins/select/bluegene/plugin/bg_record_functions.c +++ b/src/plugins/select/bluegene/plugin/bg_record_functions.c @@ -77,13 +77,13 @@ extern void print_bg_record(bg_record_t* bg_record) info("\tbitmap: %s", bitstring); } #else -{ - char tmp_char[256]; - format_node_name(bg_record, tmp_char, sizeof(tmp_char)); - info("Record: BlockID:%s Nodes:%s Conn:%s", - bg_record->bg_block_id, tmp_char, - conn_type_string(bg_record->conn_type)); -} + { + char tmp_char[256]; + format_node_name(bg_record, tmp_char, sizeof(tmp_char)); + info("Record: BlockID:%s Nodes:%s Conn:%s", + bg_record->bg_block_id, tmp_char, + conn_type_string(bg_record->conn_type)); + } #endif } @@ -129,15 +129,15 @@ extern int block_exist_in_list(List my_list, bg_record_t *bg_record) found_record->ionode_bitmap)) { if(bg_record->ionodes) debug("This block %s[%s] " - "is already in the list %s", - bg_record->nodes, - bg_record->ionodes, - found_record->bg_block_id); + "is already in the list %s", + bg_record->nodes, + bg_record->ionodes, + found_record->bg_block_id); else debug("This block %s " - "is already in the list %s", - bg_record->nodes, - found_record->bg_block_id); + "is already in the list %s", + bg_record->nodes, + found_record->bg_block_id); rc = 1; break; @@ -521,7 +521,7 @@ extern int bg_record_sort_aval_inc(bg_record_t* rec_a, bg_record_t* rec_b) && (rec_b->job_running != BLOCK_ERROR_STATE)) return 1; else if((rec_a->job_running != BLOCK_ERROR_STATE) - && (rec_b->job_running == BLOCK_ERROR_STATE)) + && (rec_b->job_running == BLOCK_ERROR_STATE)) return -1; else if(!rec_a->job_ptr && rec_b->job_ptr) return -1; @@ -750,15 +750,17 @@ extern int add_bg_record(List records, List used_nodes, blockreq_t *blockreq, /* bg_record->boot_state = 0; Implicit */ /* bg_record->state = 0; Implicit */ #ifdef HAVE_BGL - debug2("add_bg_record: asking for %s %d %d %s", - blockreq->block, blockreq->small32, blockreq->small128, - conn_type_string(blockreq->conn_type)); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("add_bg_record: asking for %s %d %d %s", + blockreq->block, blockreq->small32, blockreq->small128, + conn_type_string(blockreq->conn_type)); #else - debug2("add_bg_record: asking for %s %d %d %d %d %d %s", - blockreq->block, blockreq->small256, - blockreq->small128, blockreq->small64, - blockreq->small32, blockreq->small16, - conn_type_string(blockreq->conn_type)); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("add_bg_record: asking for %s %d %d %d %d %d %s", + blockreq->block, blockreq->small256, + blockreq->small128, blockreq->small64, + blockreq->small32, blockreq->small16, + conn_type_string(blockreq->conn_type)); #endif /* Set the bitmap blank here if it is a full node we don't want anything set we also don't want the bg_record->ionodes set. @@ -779,7 +781,7 @@ extern int add_bg_record(List records, List used_nodes, blockreq_t *blockreq, len += strlen(bg_conf->slurm_node_prefix)+1; bg_record->nodes = xmalloc(len); snprintf(bg_record->nodes, len, "%s%s", - bg_conf->slurm_node_prefix, blockreq->block+i); + bg_conf->slurm_node_prefix, blockreq->block+i); } else fatal("add_bg_record: BPs=%s is in a weird format", blockreq->block); @@ -833,7 +835,8 @@ extern int add_bg_record(List records, List used_nodes, blockreq_t *blockreq, } else { List bg_block_list = NULL; - debug("add_bg_record: adding a small block"); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("add_bg_record: adding a small block"); if(no_check) goto no_check; /* if the ionode cnt for small32 is 0 then don't @@ -1040,8 +1043,8 @@ extern int format_node_name(bg_record_t *bg_record, char *buf, int buf_size) { if(bg_record->ionodes) { snprintf(buf, buf_size, "%s[%s]", - bg_record->nodes, - bg_record->ionodes); + bg_record->nodes, + bg_record->ionodes); } else { snprintf(buf, buf_size, "%s", bg_record->nodes); } @@ -1622,7 +1625,7 @@ static int _addto_node_list(bg_record_t *bg_record, int *start, int *end) start[X], start[Y], start[Z]); } if ((end[X] >= DIM_SIZE[X]) || (end[Y] >= DIM_SIZE[Y]) - || (end[Z] >= DIM_SIZE[Z])) { + || (end[Z] >= DIM_SIZE[Z])) { fatal("bluegene.conf matrix size exceeds space defined in " "slurm.conf %c%c%cx%d%d%d => %c%c%c", alpha_num[start[X]], alpha_num[start[Y]], diff --git a/src/plugins/select/bluegene/plugin/bg_switch_connections.c b/src/plugins/select/bluegene/plugin/bg_switch_connections.c index 69d814f603d..342f3f5756f 100644 --- a/src/plugins/select/bluegene/plugin/bg_switch_connections.c +++ b/src/plugins/select/bluegene/plugin/bg_switch_connections.c @@ -222,7 +222,9 @@ static int _add_switch_conns(rm_switch_t* curr_switch, break; } conn[i].part_state = RM_PARTITION_READY; - debug3("adding %d -> %d", source, ba_conn->port_tar); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES) + info("adding %d -> %d", source, + ba_conn->port_tar); list_push(conn_list, &conn[i]); } } @@ -237,7 +239,8 @@ static int _add_switch_conns(rm_switch_t* curr_switch, return SLURM_ERROR; } } else { - debug2("we got a switch with no connections"); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES) + info("we got a switch with no connections"); list_destroy(conn_list); return SLURM_ERROR; } @@ -288,12 +291,14 @@ static int _used_switches(ba_node_t* ba_node) int i = 0, j = 0, switch_count = 0; int source = 0; - debug5("checking node %c%c%c", - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES) + info("checking node %c%c%c", + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]]); for(i=0; i<SYSTEM_DIMENSIONS; i++) { - debug5("dim %d", i); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES) + info("dim %d", i); ba_switch = &ba_node->axis_switch[i]; for(j=0; j<num_connections; j++) { /* set the source port(-) to check */ @@ -315,7 +320,9 @@ static int _used_switches(ba_node_t* ba_node) ba_conn = &ba_switch->int_wire[source]; if(ba_conn->used && ba_conn->port_tar != source) { switch_count++; - debug5("used"); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_WIRES) + info("used"); break; } } @@ -631,7 +638,8 @@ cleanup: return SLURM_ERROR; } #endif - debug2("making the small block"); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES) + info("making the small block"); return rc; } @@ -688,8 +696,10 @@ extern int configure_block_switches(bg_record_t * bg_record) goto cleanup; } #endif - debug4("BP count %d", bg_record->bp_count); - debug4("switch count %d", bg_record->switch_count); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES) + info("BP count %d", bg_record->bp_count); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES) + info("switch count %d", bg_record->switch_count); list_iterator_reset(itr); while ((ba_node = list_next(itr))) { @@ -701,16 +711,18 @@ extern int configure_block_switches(bg_record_t * bg_record) } #endif if(!ba_node->used) { - debug4("%c%c%c is a passthrough, " - "not including in request", - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES) + info("%c%c%c is a passthrough, " + "not including in request", + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]]); } else { - debug3("using node %c%c%c", - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_WIRES) + info("using node %c%c%c", + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]]); #ifdef HAVE_BG_FILES if (first_bp){ if ((rc = bridge_set_data(bg_record->bg_block, @@ -758,7 +770,9 @@ extern int configure_block_switches(bg_record_t * bg_record) if(_add_switch_conns(coord_switch[i], &ba_node->axis_switch[i]) == SLURM_SUCCESS) { - debug3("adding switch dim %d", i); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_WIRES) + info("adding switch dim %d", i); if (first_switch){ if ((rc = bridge_set_data( bg_record->bg_block, diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index c3339270d24..769861ae684 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -95,6 +95,7 @@ extern int init_bg(void) xassert(slurmctld_conf.node_prefix); bg_conf->slurm_user_name = xstrdup(slurmctld_conf.slurm_user_name); bg_conf->slurm_node_prefix = xstrdup(slurmctld_conf.node_prefix); + bg_conf->slurm_debug_flags = slurmctld_conf.debug_flags; slurm_conf_unlock(); #ifdef HAVE_BGL @@ -198,7 +199,8 @@ extern int remove_all_users(char *bg_block_id, char *user_name) returnc = REMOVE_USER_ERR; user_count = 0; } else - debug2("got %d users for %s", user_count, bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("got %d users for %s", user_count, bg_block_id); for(i=0; i<user_count; i++) { if(i) { if ((rc = bridge_get_data(block_ptr, @@ -263,9 +265,10 @@ extern int remove_all_users(char *bg_block_id, char *user_name) extern int set_block_user(bg_record_t *bg_record) { int rc = 0; - debug("resetting the boot state flag and " - "counter for block %s.", - bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("resetting the boot state flag and " + "counter for block %s.", + bg_record->bg_block_id); bg_record->boot_state = 0; bg_record->boot_count = 0; @@ -408,7 +411,9 @@ extern bg_record_t *find_and_remove_org_from_bg_list(List my_list, if(!strcmp(bg_record->bg_block_id, found_record->bg_block_id)) { list_remove(itr); - debug2("got the block"); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_SELECT_TYPE) + info("got the block"); break; } } @@ -432,7 +437,9 @@ extern bg_record_t *find_org_in_bg_list(List my_list, bg_record_t *bg_record) if(!strcmp(bg_record->bg_block_id, found_record->bg_block_id)) { - debug2("got the block"); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_SELECT_TYPE) + info("got the block"); break; } } @@ -466,7 +473,9 @@ extern int bg_free_block(bg_record_t *bg_record, bool wait, bool locked) if (bg_record->state != NO_VAL && bg_record->state != RM_PARTITION_FREE && bg_record->state != RM_PARTITION_DEALLOCATING) { - debug2("bridge_destroy %s", bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("bridge_destroy %s", + bg_record->bg_block_id); #ifdef HAVE_BG_FILES rc = bridge_destroy_block(bg_record->bg_block_id); if (rc != STATUS_OK) { @@ -486,11 +495,13 @@ extern int bg_free_block(bg_record_t *bg_record, bool wait, bool locked) == RM_PARTITION_ERROR) break; #endif - debug2("bridge_destroy_partition" - "(%s): %s State = %d", - bg_record->bg_block_id, - bg_err_str(rc), - bg_record->state); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_SELECT_TYPE) + info("bridge_destroy_partition" + "(%s): %s State = %d", + bg_record->bg_block_id, + bg_err_str(rc), + bg_record->state); } else { error("bridge_destroy_partition" "(%s): %s State = %d", @@ -553,9 +564,11 @@ extern void *mult_free_block(void *args) bg_record->job_running); term_jobs_on_block(bg_record->bg_block_id); } - debug("freeing the block %s.", bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("freeing the block %s.", bg_record->bg_block_id); bg_free_block(bg_record, 1, 0); - debug("done\n"); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("done\n"); slurm_mutex_lock(&freed_cnt_mutex); num_block_freed++; slurm_mutex_unlock(&freed_cnt_mutex); @@ -613,22 +626,27 @@ extern void *mult_destroy_block(void *args) num_unused_cpus += bg_record->cpu_cnt; } slurm_mutex_unlock(&block_state_mutex); - debug3("removing the jobs on block %s\n", - bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("removing the jobs on block %s\n", + bg_record->bg_block_id); term_jobs_on_block(bg_record->bg_block_id); - debug2("destroying %s", (char *)bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("destroying %s", (char *)bg_record->bg_block_id); if(bg_free_block(bg_record, 1, 0) == SLURM_ERROR) { debug("there was an error"); goto already_here; } - debug2("done destroying"); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("done destroying"); slurm_mutex_lock(&block_state_mutex); remove_from_bg_list(bg_lists->freeing, bg_record); slurm_mutex_unlock(&block_state_mutex); #ifdef HAVE_BG_FILES - debug2("removing %s from database", bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("removing %s from database", + bg_record->bg_block_id); rc = bridge_remove_block(bg_record->bg_block_id); if (rc != STATUS_OK) { @@ -641,14 +659,16 @@ extern void *mult_destroy_block(void *args) bg_err_str(rc)); } } else - debug2("done %s", - (char *)bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("done %s", + (char *)bg_record->bg_block_id); #endif slurm_mutex_lock(&block_state_mutex); destroy_bg_record(bg_record); slurm_mutex_unlock(&block_state_mutex); last_bg_update = time(NULL); - debug2("destroyed"); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("destroyed"); already_here: slurm_mutex_lock(&freed_cnt_mutex); @@ -701,7 +721,9 @@ extern int free_block_list(List delete_list) while ((found_record = (bg_record_t*)list_pop(delete_list)) != NULL) { /* push job onto queue in a FIFO */ - debug3("adding %s to be freed", found_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("adding %s to be freed", + found_record->bg_block_id); if(!block_ptr_exist_in_list(*block_list, found_record)) { num_block_to_free++; if (list_push(*block_list, found_record) == NULL) @@ -775,7 +797,8 @@ extern int read_bg_conf(void) ListIterator itr = NULL; char* bg_conf_file = NULL; - debug("Reading the bluegene.conf file"); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("Reading the bluegene.conf file"); /* check if config file has changed */ bg_conf_file = _get_bg_conf(); @@ -784,10 +807,12 @@ extern int read_bg_conf(void) fatal("can't stat bluegene.conf file %s: %m", bg_conf_file); if (last_config_update) { _reopen_bridge_log(); - if(last_config_update == config_stat.st_mtime) - debug("%s unchanged", bg_conf_file); - else { - info("Restart slurmctld for %s changes to take effect", + if(last_config_update == config_stat.st_mtime) { + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("%s unchanged", bg_conf_file); + } else { + info("Restart slurmctld for %s changes " + "to take effect", bg_conf_file); } last_config_update = config_stat.st_mtime; @@ -826,7 +851,9 @@ extern int read_bg_conf(void) "If this isn't correct please set BlrtsImage", bg_conf->default_blrtsimage); } else { - debug3("default BlrtsImage %s", bg_conf->default_blrtsimage); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("default BlrtsImage %s", + bg_conf->default_blrtsimage); image = xmalloc(sizeof(image_t)); image->name = xstrdup(bg_conf->default_blrtsimage); image->def = true; @@ -855,7 +882,9 @@ extern int read_bg_conf(void) "If this isn't correct please set LinuxImage", bg_conf->default_linuximage); } else { - debug3("default LinuxImage %s", bg_conf->default_linuximage); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("default LinuxImage %s", + bg_conf->default_linuximage); image = xmalloc(sizeof(image_t)); image->name = xstrdup(bg_conf->default_linuximage); image->def = true; @@ -885,8 +914,9 @@ extern int read_bg_conf(void) "If this isn't correct please set RamDiskImage", bg_conf->default_ramdiskimage); } else { - debug3("default RamDiskImage %s", - bg_conf->default_ramdiskimage); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("default RamDiskImage %s", + bg_conf->default_ramdiskimage); image = xmalloc(sizeof(image_t)); image->name = xstrdup(bg_conf->default_ramdiskimage); image->def = true; @@ -916,7 +946,9 @@ extern int read_bg_conf(void) "If this isn't correct please set CnloadImage", bg_conf->default_linuximage); } else { - debug3("default CnloadImage %s", bg_conf->default_linuximage); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("default CnloadImage %s", + bg_conf->default_linuximage); image = xmalloc(sizeof(image_t)); image->name = xstrdup(bg_conf->default_linuximage); image->def = true; @@ -946,7 +978,9 @@ extern int read_bg_conf(void) "If this isn't correct please set IoloadImage", bg_conf->default_ramdiskimage); } else { - debug3("default IoloadImage %s", bg_conf->default_ramdiskimage); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("default IoloadImage %s", + bg_conf->default_ramdiskimage); image = xmalloc(sizeof(image_t)); image->name = xstrdup(bg_conf->default_ramdiskimage); image->def = true; @@ -977,8 +1011,9 @@ extern int read_bg_conf(void) "If this isn't correct please set MloaderImage", bg_conf->default_mloaderimage); } else { - debug3("default MloaderImage %s", - bg_conf->default_mloaderimage); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("default MloaderImage %s", + bg_conf->default_mloaderimage); image = xmalloc(sizeof(image_t)); image->name = xstrdup(bg_conf->default_mloaderimage); image->def = true; @@ -1087,8 +1122,9 @@ extern int read_bg_conf(void) bg_conf->smallest_block=512; } #endif - debug("Smallest block possible on this system is %u", - bg_conf->smallest_block); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("Smallest block possible on this system is %u", + bg_conf->smallest_block); /* below we are creating all the possible bitmaps for * each size of small block */ @@ -1273,7 +1309,8 @@ extern int validate_current_blocks(char *dir) last_bg_update = time(NULL); sort_bg_record_inc_size(bg_lists->main); slurm_mutex_unlock(&block_state_mutex); - debug("Blocks have finished being created."); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("Blocks have finished being created."); return SLURM_SUCCESS; } @@ -1654,8 +1691,9 @@ static int _reopen_bridge_log(void) rc = bridge_set_log_params(bg_conf->bridge_api_file, bg_conf->bridge_api_verb); #endif - debug3("Bridge api file set to %s, verbose level %d\n", - bg_conf->bridge_api_file, bg_conf->bridge_api_verb); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_SELECT_TYPE) + info("Bridge api file set to %s, verbose level %d\n", + bg_conf->bridge_api_file, bg_conf->bridge_api_verb); return rc; } diff --git a/src/plugins/select/bluegene/plugin/bluegene.h b/src/plugins/select/bluegene/plugin/bluegene.h index 84e71ff4a86..871a854cbf1 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.h +++ b/src/plugins/select/bluegene/plugin/bluegene.h @@ -58,6 +58,7 @@ typedef struct { uint16_t bp_nodecard_cnt; char *bridge_api_file; uint16_t bridge_api_verb; + uint32_t slurm_debug_flags; #ifdef HAVE_BGL char *default_blrtsimage; #endif diff --git a/src/plugins/select/bluegene/plugin/dynamic_block.c b/src/plugins/select/bluegene/plugin/dynamic_block.c index aeee4c9aed5..eac48d5a3d3 100644 --- a/src/plugins/select/bluegene/plugin/dynamic_block.c +++ b/src/plugins/select/bluegene/plugin/dynamic_block.c @@ -91,33 +91,61 @@ extern List create_dynamic_block(List block_list, bit_or(my_bitmap, bg_record->bitmap); for(i=0; i<SYSTEM_DIMENSIONS; i++) geo[i] = bg_record->geo[i]; - debug2("adding %s %c%c%c %c%c%c", - bg_record->nodes, - alpha_num[bg_record->start[X]], - alpha_num[bg_record->start[Y]], - alpha_num[bg_record->start[Z]], - alpha_num[geo[X]], - alpha_num[geo[Y]], - alpha_num[geo[Z]]); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_PICK) + info("adding %s(%s) %s %c%c%c " + "%c%c%c %u", + bg_record->bg_block_id, + bg_record->nodes, + bg_block_state_string( + bg_record->state), + alpha_num[bg_record->start[X]], + alpha_num[bg_record->start[Y]], + alpha_num[bg_record->start[Z]], + alpha_num[geo[X]], + alpha_num[geo[Y]], + alpha_num[geo[Z]], + bg_record->node_cnt); if(check_and_set_node_list( bg_record->bg_block_list) == SLURM_ERROR) { - debug2("something happened in " - "the load of %s", - bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_PICK) + info("something happened in " + "the load of %s", + bg_record->bg_block_id); list_iterator_destroy(itr); FREE_NULL_BITMAP(my_bitmap); rc = SLURM_ERROR; goto finished; } + } else if(bg_conf->slurm_debug_flags + & DEBUG_FLAG_BG_PICK) { + for(i=0; i<SYSTEM_DIMENSIONS; i++) + geo[i] = bg_record->geo[i]; + + info("not adding %s(%s) %s %c%c%c " + "%c%c%c %u", + bg_record->bg_block_id, + bg_record->nodes, + bg_block_state_string( + bg_record->state), + alpha_num[bg_record->start[X]], + alpha_num[bg_record->start[Y]], + alpha_num[bg_record->start[Z]], + alpha_num[geo[X]], + alpha_num[geo[Y]], + alpha_num[geo[Z]], + bg_record->node_cnt); } } list_iterator_destroy(itr); FREE_NULL_BITMAP(my_bitmap); } else { reset_ba_system(false); - debug("No list was given"); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("No list was given"); } if(request->avail_node_bitmap) { @@ -216,7 +244,8 @@ extern List create_dynamic_block(List block_list, list_sort(block_list, (ListCmpF)bg_record_sort_aval_inc); list_destroy(new_blocks); new_blocks = NULL; - debug2("small block not able to be placed inside others"); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("small block not able to be placed inside others"); } if(request->conn_type == SELECT_NAV) @@ -249,9 +278,10 @@ extern List create_dynamic_block(List block_list, if (allocate_block(request, results)) goto setup_records; - debug2("allocate failure for size %d base " - "partitions of free midplanes", - request->size); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("allocate failure for size %d base " + "partitions of free midplanes", + request->size); rc = SLURM_ERROR; if(!list_count(block_list) || !my_block_list) @@ -274,8 +304,9 @@ extern List create_dynamic_block(List block_list, && (bit_ffs(bg_record->ionode_bitmap) != 0)) continue; - debug3("removing %s for request %d", - bg_record->nodes, request->size); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("removing %s for request %d", + bg_record->nodes, request->size); remove_block(bg_record->bg_block_list, (int)NO_VAL, (int)bg_record->conn_type); /* need to set any unusable nodes that this last block @@ -289,8 +320,9 @@ extern List create_dynamic_block(List block_list, if (allocate_block(request, results)) break; - debug2("allocate failure for size %d base partitions", - request->size); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("allocate failure for size %d base partitions", + request->size); rc = SLURM_ERROR; } list_iterator_destroy(itr); @@ -410,8 +442,9 @@ extern bg_record_t *create_small_record(bg_record_t *bg_record, found_record->ionode_bitmap = bit_copy(ionodes); bit_fmt(bitstring, BITSIZE, found_record->ionode_bitmap); found_record->ionodes = xstrdup(bitstring); - debug4("made small block of %s[%s]", - found_record->nodes, found_record->ionodes); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("made small block of %s[%s]", + found_record->nodes, found_record->ionodes); return found_record; } @@ -586,17 +619,19 @@ static int _split_block(List block_list, List new_blocks, } #ifdef HAVE_BGL - debug2("Asking for %u 32CNBlocks, and %u 128CNBlocks " - "from a %u block, starting at ionode %d.", - blockreq.small32, blockreq.small128, - bg_record->node_cnt, start); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("Asking for %u 32CNBlocks, and %u 128CNBlocks " + "from a %u block, starting at ionode %d.", + blockreq.small32, blockreq.small128, + bg_record->node_cnt, start); #else - debug2("Asking for %u 16CNBlocks, %u 32CNBlocks, " - "%u 64CNBlocks, %u 128CNBlocks, and %u 256CNBlocks" - "from a %u block, starting at ionode %d.", - blockreq.small16, blockreq.small32, - blockreq.small64, blockreq.small128, - blockreq.small256, bg_record->node_cnt, start); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("Asking for %u 16CNBlocks, %u 32CNBlocks, " + "%u 64CNBlocks, %u 128CNBlocks, and %u 256CNBlocks " + "from a %u block, starting at ionode %d.", + blockreq.small16, blockreq.small32, + blockreq.small64, blockreq.small128, + blockreq.small256, bg_record->node_cnt, start); #endif handle_small_record_request(new_blocks, &blockreq, bg_record, start); @@ -617,8 +652,9 @@ static int _breakup_blocks(List block_list, List new_blocks, int cnodes = request->procs / bg_conf->cpu_ratio; int curr_bp_bit = -1; - debug2("cpu_count= %d cnodes=%d o_free=%d o_small=%d", - request->procs, cnodes, only_free, only_small); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("cpu_count= %d cnodes=%d o_free=%d o_small=%d", + request->procs, cnodes, only_free, only_small); switch(cnodes) { case 16: @@ -664,15 +700,18 @@ static int _breakup_blocks(List block_list, List new_blocks, if (request->avail_node_bitmap && !bit_super_set(bg_record->bitmap, request->avail_node_bitmap)) { - debug2("bg block %s has nodes not usable by this job", - bg_record->bg_block_id); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("bg block %s has nodes not usable " + "by this job", + bg_record->bg_block_id); continue; } if(bg_record->node_cnt == cnodes) { - debug2("found it here %s, %s", - bg_record->bg_block_id, - bg_record->nodes); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("found it here %s, %s", + bg_record->bg_block_id, + bg_record->nodes); request->save_name = xstrdup_printf( "%c%c%c", alpha_num[bg_record->start[X]], @@ -735,11 +774,12 @@ static int _breakup_blocks(List block_list, List new_blocks, total_cnode_cnt += num_cnodes; bit_fmt(bitstring, BITSIZE, ionodes); - debug2("1 adding %s %s %d got %d set " - "ionodes %s total is %s", - bg_record->bg_block_id, bg_record->nodes, - num_cnodes, total_cnode_cnt, - bg_record->ionodes, bitstring); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("1 adding %s %s %d got %d set " + "ionodes %s total is %s", + bg_record->bg_block_id, bg_record->nodes, + num_cnodes, total_cnode_cnt, + bg_record->ionodes, bitstring); if(total_cnode_cnt == cnodes) { request->save_name = xstrdup_printf( "%c%c%c", @@ -769,10 +809,12 @@ static int _breakup_blocks(List block_list, List new_blocks, bg_record_t *found_record = NULL; if(bg_record->original) { - debug3("This was a copy"); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("This was a copy"); found_record = bg_record->original; } else { - debug3("looking for original"); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("looking for original"); found_record = find_org_in_bg_list( bg_lists->main, bg_record); } @@ -784,9 +826,10 @@ static int _breakup_blocks(List block_list, List new_blocks, format_node_name(found_record, tmp_char, sizeof(tmp_char)); - debug2("going to split %s, %s", - found_record->bg_block_id, - tmp_char); + if(bg_conf->slurm_debug_flags & DEBUG_FLAG_BG_PICK) + info("going to split %s, %s", + found_record->bg_block_id, + tmp_char); request->save_name = xstrdup_printf( "%c%c%c", alpha_num[found_record->start[X]], diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index 04c3df46556..771df1a3911 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -1375,6 +1375,8 @@ extern int select_p_reconfigure(void) "want you will need to restart slurm for this " "change to be enforced in the bluegene plugin.", bg_conf->slurm_node_prefix, slurmctld_conf.node_prefix); + bg_conf->slurm_debug_flags = slurmctld_conf.debug_flags; + set_ba_debug_flags(bg_conf->slurm_debug_flags); slurm_conf_unlock(); return SLURM_SUCCESS; -- GitLab