diff --git a/NEWS b/NEWS index 85d5c5fbfcf6aff4c7ed586c6b86c664e8e036f9..5268a3926d6c15f95543f8d9e07927e72c617b92 100644 --- a/NEWS +++ b/NEWS @@ -96,6 +96,8 @@ documents those changes that are of interest to users and admins. termination time). - fix for step allocation to be able to specify only a few nodes in a step and ask for more that specified. + - patch from Hongjia Cao for forwarding logic + - BLUEGENE - able to allocate specific nodes without locking up. * Changes in SLURM 1.1.13 ========================= diff --git a/src/common/forward.c b/src/common/forward.c index b9d006018c409902543cce811f6270f54c86888d..7a41738a0c4dc19fde2451aa9bac6da00a08f629 100644 --- a/src/common/forward.c +++ b/src/common/forward.c @@ -182,7 +182,7 @@ void *_forward_thread(void *arg) } break; } - + slurm_mutex_lock(fwd_msg->forward_mutex); if(ret_list) { while((ret_data_info = list_pop(ret_list)) != NULL) { diff --git a/src/common/plugstack.c b/src/common/plugstack.c index 536df262ac52d43c58e8be4f42f0aca23f1ed827..8f5c87f6b99bcdd07f4c3e54a4a62df1cbb03d49 100644 --- a/src/common/plugstack.c +++ b/src/common/plugstack.c @@ -349,11 +349,12 @@ _spank_stack_process_line(const char *file, int line, char *buf, } if (!(p = _spank_plugin_create(path, ac, argv, required))) { - error ("spank: %s:%d: Failed to load %s plugin from %s. %s", - file, line, - required ? "required" : "optional", - path, - required ? "Aborting." : "Ignoring."); + if (required) + error ("spank: %s:%d: Failed to load plugin %s. Aborting.", + file, line, path); + else + verbose ("spank: %s:%d: Failed to load optional plugin %s. Ignored.", + file, line, path); return (required ? -1 : 0); } @@ -554,12 +555,15 @@ int spank_init(slurmd_job_t * job) return (-1); } + if (_do_call_stack(SPANK_INIT, job, -1) < 0) + return (-1); + if (job && spank_get_remote_options(job->options) < 0) { error("spank: Unable to get remote options"); return (-1); } - return (_do_call_stack(SPANK_INIT, job, -1)); + return (0); } int spank_user(slurmd_job_t * job) diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index 3982bbb57a4afa21c4784b1323a3303ed9aa411d..a5ee2f79e129e62524412906f08b9b7bc5d718dd 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -17,7 +17,7 @@ * any later version. * * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under + * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than @@ -147,6 +147,9 @@ static int _find_best_block_match(struct job_record* job_ptr, select_g_get_jobinfo(job_ptr->select_jobinfo, SELECT_DATA_MAX_PROCS, &max_procs); + if(start[X] != (uint16_t)NO_VAL) + start_req = 1; + if(req_geometry[0] != 0 && req_geometry[0] != (uint16_t)NO_VAL) { target_size = 1; for (i=0; i<BA_SYSTEM_DIMENSIONS; i++) @@ -165,7 +168,7 @@ static int _find_best_block_match(struct job_record* job_ptr, } if (target_size == 0) { /* no geometry specified */ if(job_ptr->details->req_nodes - && start[0] == (uint16_t)NO_VAL) { + && !start_req) { bg_record_t *tmp_record = NULL; char *tmp_nodes= job_ptr->details->req_nodes; int len = strlen(tmp_nodes); @@ -450,6 +453,27 @@ try_again: slurm_mutex_lock(&request_list_mutex); itr = list_iterator_create(bg_request_list); while ((try_request = list_next(itr))) { + if(start_req) { + if ((try_request->start[X] != start[X]) + || (try_request->start[Y] != start[Y]) + || (try_request->start[Z] != start[Z])) { + debug4("got %d%d%d looking for %d%d%d", + try_request->start[X], + try_request->start[Y], + try_request->start[Z], + start[X], + start[Y], + start[Z]); + continue; + } + debug3("found %d%d%d looking for %d%d%d", + try_request->start[X], + try_request->start[Y], + try_request->start[Z], + start[X], + start[Y], + start[Z]); + } if(try_request->procs >= req_procs) { debug("already tried to create but " "can't right now."); @@ -465,7 +489,7 @@ try_again: slurm_mutex_unlock(&request_list_mutex); } - if(!found && test_only) { + if(test_only) { for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) request.start[i] = start[i]; @@ -504,6 +528,9 @@ try_again: try_request->procs = req_procs; try_request->save_name = NULL; try_request->elongate_geos = NULL; + try_request->start_req = request.start_req; + for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) + try_request->start[i] = start[i]; slurm_mutex_lock(&request_list_mutex); list_push(bg_request_list, try_request); slurm_mutex_unlock(&request_list_mutex); diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index f28d2398695f873575c503771fccd9c8e8465644..02316495be48cbac38ec0124cabdb1703a06832d 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -1098,6 +1098,7 @@ extern int create_dynamic_block(ba_request_t *request, List my_block_list) } else { num_quarter=4; } + if(_breakup_blocks(request, my_block_list, &block_inx) != SLURM_SUCCESS) { debug2("small block not able to be placed"); @@ -1125,41 +1126,46 @@ extern int create_dynamic_block(ba_request_t *request, List my_block_list) } /*Try to put block starting in the smallest of the exisiting blocks*/ - itr = list_iterator_create(bg_list); - while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) { - request->rotate_count = 0; - request->elongate_count = 1; + if(!request->start_req) { + itr = list_iterator_create(bg_list); + while ((bg_record = (bg_record_t *) list_next(itr)) != NULL) { + request->rotate_count = 0; + request->elongate_count = 1; - if(bg_record->job_running == -1 - && (bg_record->quarter == (uint16_t) NO_VAL - || (bg_record->quarter == 0 - && (bg_record->nodecard == (uint16_t) NO_VAL - || bg_record->nodecard == 0)))) { - - for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) - request->start[i] = bg_record->start[i]; - debug2("allocating %s %d%d%d %d", - bg_record->nodes, - request->start[X], - request->start[Y], - request->start[Z], - request->size); - request->start_req = 1; - rc = SLURM_SUCCESS; - if (!allocate_block(request, NULL)){ - debug2("allocate failure for size %d " - "base partitions", + if(bg_record->job_running == -1 + && (bg_record->quarter == (uint16_t) NO_VAL + || (bg_record->quarter == 0 + && (bg_record->nodecard == (uint16_t) NO_VAL + || bg_record->nodecard == 0)))) { + + for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) + request->start[i] = + bg_record->start[i]; + debug2("allocating %s %d%d%d %d", + bg_record->nodes, + request->start[X], + request->start[Y], + request->start[Z], request->size); - rc = SLURM_ERROR; - } else - break; + request->start_req = 1; + rc = SLURM_SUCCESS; + if (!allocate_block(request, NULL)){ + debug2("allocate failure for size %d " + "base partitions", + request->size); + rc = SLURM_ERROR; + } else + break; + } } + list_iterator_destroy(itr); + + request->start_req = 0; + for(i=0; i<BA_SYSTEM_DIMENSIONS; i++) + request->start[i] = (uint16_t) NO_VAL; } - list_iterator_destroy(itr); - no_list: - if(!bg_record) { - request->start_req = 0; + if(!bg_record) { rc = SLURM_SUCCESS; if (!allocate_block(request, NULL)){ debug("allocate failure for size %d base partitions", @@ -2344,6 +2350,27 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list, continue; if(bg_record->state != RM_PARTITION_FREE) continue; + if(request->start_req) { + if ((request->start[X] != bg_record->start[X]) + || (request->start[Y] != bg_record->start[Y]) + || (request->start[Z] != bg_record->start[Z])) { + debug4("small got %d%d%d looking for %d%d%d", + bg_record->start[X], + bg_record->start[Y], + bg_record->start[Z], + request->start[X], + request->start[Y], + request->start[Z]); + continue; + } + debug3("small found %d%d%d looking for %d%d%d", + bg_record->start[X], + bg_record->start[Y], + bg_record->start[Z], + request->start[X], + request->start[Y], + request->start[Z]); + } proc_cnt = bg_record->bp_count * bg_record->cpus_per_bp; if(proc_cnt == request->procs) { @@ -2410,6 +2437,28 @@ static int _breakup_blocks(ba_request_t *request, List my_block_list, != NULL) { if(bg_record->job_running != -1) continue; + if(request->start_req) { + if ((request->start[X] != bg_record->start[X]) + || (request->start[Y] != bg_record->start[Y]) + || (request->start[Z] != bg_record->start[Z])) { + debug4("small 2 got %d%d%d looking for %d%d%d", + bg_record->start[X], + bg_record->start[Y], + bg_record->start[Z], + request->start[X], + request->start[Y], + request->start[Z]); + continue; + } + debug3("small 2 found %d%d%d looking for %d%d%d", + bg_record->start[X], + bg_record->start[Y], + bg_record->start[Z], + request->start[X], + request->start[Y], + request->start[Z]); + } + proc_cnt = bg_record->bp_count * bg_record->cpus_per_bp; if(proc_cnt == request->procs) { debug2("found it here %s, %s", diff --git a/src/sacct/print.c b/src/sacct/print.c index 8834e4f12fc9ede44228f082cf0ce569e9a73e72..f7f96a642655af7aff328cd9ac1b5865d892eea6 100644 --- a/src/sacct/print.c +++ b/src/sacct/print.c @@ -17,7 +17,7 @@ * any later version. * * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under + * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than @@ -217,7 +217,7 @@ void print_idrss(type_t type, void *object) struct rusage rusage; char outbuf[FORMAT_STRING_SIZE]; rusage.ru_idrss = 0; - + switch(type) { case HEADLINE: printf("%8s", "Idrss"); diff --git a/src/smap/configure_functions.c b/src/smap/configure_functions.c index 8b5805326d3ebf49105ddf638292a1d0fa821400..5620cc2c123f53e4e92f429882f04267633b7192 100644 --- a/src/smap/configure_functions.c +++ b/src/smap/configure_functions.c @@ -17,7 +17,7 @@ * any later version. * * In addition, as a special exception, the copyright holders give permission - * to link the code of portions of this program with the OpenSSL library under + * to link the code of portions of this program with the OpenSSL library under * certain conditions as described in each individual source file, and * distribute linked combinations including the two. You must obey the GNU * General Public License in all respects for all of the code used other than @@ -899,6 +899,10 @@ static int _add_bg_record(blockreq_t *blockreq, List allocated_blocks) geo[Y] = 0; geo[Z] = 0; + start1[X] = -1; + start1[Y] = -1; + start1[Z] = -1; + end1[X] = -1; end1[Y] = -1; end1[Z] = -1;