diff --git a/META b/META index d9c132c8d2f83e98ea93d9bb4457ca3c2ecdb5ad..0892d78a8459ac3af609f023299b04da4cba3e89 100644 --- a/META +++ b/META @@ -11,7 +11,7 @@ Minor: 1 Micro: 0 Version: 2.1.0 - Release: 0.pre2 + Release: 0.pre3 ## # When changing API_CURRENT update src/common/slurm_protocol_common.h # with a new SLURM_PROTOCOL_VERSION signifing the old one and the version diff --git a/NEWS b/NEWS index f56bd189b6dbb07e902856bca05991e44a2138ed..139dd49a77c981a37519b969ae7cfa7b2c4805d9 100644 --- a/NEWS +++ b/NEWS @@ -211,6 +211,11 @@ documents those changes that are of interest to users and admins. -- Slurm.pm loads without warning now on AIX systems -- modified pmi code to do strncpy's on the correct len -- Fix for filling in a qos structure to return SLURM_SUCCESS on success. + -- BLUEGENE - Added SLURM_BG_NUM_NODES with cnode count of allocation, + SLURM_JOB_NUM_NODES represents midplane counts until 2.1. + -- BLUEGENE - Added fix for if a block is in error state and the midplane + containning the block is also set to drain/down. This previously + prevented dynamic creation of new blocks when this state was present. * Changes in SLURM 2.0.4 ======================== diff --git a/src/common/env.c b/src/common/env.c index 98b6d9cf6637690743cf99de3e1d1cfa7b24b4f6..6bbd90efe86eb04c81547758dfeeae8b6f150376 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -883,7 +883,8 @@ extern char *uint32_compressed_to_str(uint32_t array_len, * SLURM_JOB_NODELIST * SLURM_JOB_CPUS_PER_NODE * LOADLBATCH (AIX only) - * MPIRUN_PARTITION, MPIRUN_NOFREE, and MPIRUN_NOALLOCATE (BGL only) + * SLURM_BG_NUM_NODES, MPIRUN_PARTITION, MPIRUN_NOFREE, and + * MPIRUN_NOALLOCATE (BGL only) * * Sets OBSOLETE variables (needed for MPI, do not remove): * SLURM_JOBID @@ -911,6 +912,7 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, &node_cnt); if(!node_cnt) node_cnt = alloc->node_cnt; + env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES", "%u", node_cnt); #endif env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", alloc->job_id); @@ -1070,6 +1072,9 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, env_array_overwrite_fmt(dest, "SLURM_JOB_ID", "%u", batch->job_id); env_array_overwrite_fmt(dest, "SLURM_JOB_NUM_NODES", "%u", num_nodes); +#ifdef HAVE_BG + env_array_overwrite_fmt(dest, "SLURM_BG_NUM_NODES", "%u", num_nodes); +#endif env_array_overwrite_fmt(dest, "SLURM_JOB_NODELIST", "%s", batch->nodes); tmp = uint32_compressed_to_str(batch->num_cpu_groups, diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.c b/src/plugins/select/bluegene/block_allocator/block_allocator.c index b262acbba968f00083520f00e061bbf83257200a..3d10468dc1ce82bc2b4e53ab53e882a80d44044a 100644 --- a/src/plugins/select/bluegene/block_allocator/block_allocator.c +++ b/src/plugins/select/bluegene/block_allocator/block_allocator.c @@ -1223,10 +1223,11 @@ extern void ba_update_node_state(ba_node_t *ba_node, uint16_t state) /* basically set the node as used */ if((node_base_state == NODE_STATE_DOWN) - || (ba_node->state & NODE_STATE_DRAIN)) + || (state & NODE_STATE_DRAIN)) ba_node->used = true; else ba_node->used = false; + ba_node->state = state; } @@ -1486,14 +1487,26 @@ extern int check_and_set_node_list(List nodes) grid[ba_node->coord[X]] [ba_node->coord[Y]] [ba_node->coord[Z]]; + if(ba_node->used && curr_ba_node->used) { - debug4("I have already been to " - "this node %c%c%c", - alpha_num[ba_node->coord[X]], - alpha_num[ba_node->coord[Y]], - alpha_num[ba_node->coord[Z]]); - rc = SLURM_ERROR; - goto end_it; + /* Only error if the midplane isn't already + * marked down or in a error state outside of + * the bluegene block. + */ + uint16_t base_state, node_flags; + base_state = curr_ba_node->state & NODE_STATE_BASE; + node_flags = curr_ba_node->state & NODE_STATE_FLAGS; + if (!(node_flags & (NODE_STATE_DRAIN | NODE_STATE_FAIL)) + && (base_state != NODE_STATE_DOWN)) { + debug4("I have already been to " + "this node %c%c%c %s", + alpha_num[ba_node->coord[X]], + alpha_num[ba_node->coord[Y]], + alpha_num[ba_node->coord[Z]], + node_state_string(curr_ba_node->state)); + rc = SLURM_ERROR; + goto end_it; + } } if(ba_node->used) @@ -3770,7 +3783,6 @@ requested_end: static bool _node_used(ba_node_t* ba_node, int x_size) { ba_switch_t* ba_switch = NULL; - /* if we've used this node in another block already */ if (!ba_node || ba_node->used) { debug4("node %c%c%c used", diff --git a/src/plugins/select/bluegene/plugin/block_sys.c b/src/plugins/select/bluegene/plugin/block_sys.c index f0898407c52d6255df89366ab00f3c350da2b488..720e55f5a092d624fe19899acefe975348029a12 100755 --- a/src/plugins/select/bluegene/plugin/block_sys.c +++ b/src/plugins/select/bluegene/plugin/block_sys.c @@ -1083,7 +1083,7 @@ extern int load_state_file(List curr_block_list, char *dir_name) #endif slurm_mutex_lock(&block_state_mutex); - reset_ba_system(false); + reset_ba_system(true); /* Locks are already in place to protect part_list here */ bitmap = bit_alloc(node_record_count); diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index f56a7e1d5ca809336de53ba4ce6d9bce361feced..434972322efc526dc98fe008273dd41fc12791d3 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -1047,7 +1047,6 @@ extern int select_p_update_node_config (int index) extern int select_p_update_node_state (int index, uint16_t state) { int x, y, z; - for (y = DIM_SIZE[Y] - 1; y >= 0; y--) { for (z = 0; z < DIM_SIZE[Z]; z++) { for (x = 0; x < DIM_SIZE[X]; x++) { diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 02060e27cd19938031ce6a04601deb6e57da8d72..4ee7a54f8099aa223ac13ebb5a2618a992b8a33c 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -642,6 +642,20 @@ extern int load_all_node_state ( bool state_only ) node_ptr->last_idle = now; } xfree (node_name); + if(node_ptr) { + /* If the state is UNKNOWN we will assume IDLE + until the nodes check in. This is needed + for bluegene to set up drained nodes + correctly. */ + if((node_ptr->node_state & NODE_STATE_BASE) + == NODE_STATE_UNKNOWN) + node_ptr->node_state |= NODE_STATE_IDLE; + + select_g_update_node_state( + (node_ptr - node_record_table_ptr), + node_ptr->node_state); + } + } fini: info("Recovered state of %d nodes", node_cnt);