diff --git a/NEWS b/NEWS index d6503fa2fe95100a8cf0cc5cd8465540110bc1f1..b2271f76c8e2d0edc796326a1c5a2f3f69a27cde 100644 --- a/NEWS +++ b/NEWS @@ -50,6 +50,9 @@ documents those changes that are of interest to users and admins. -- BLUEGENE - If a large block (> 1 midplane) is in error and underlying hardware is marked bad remove the larger block and create a block over just the bad hardware making the other hardware available to run on. + -- BLUEGENE - Handle job completion correctly if an admin removes a block + where other blocks on an overlapping midplane are running jobs. + -- BLUEGENE - correctly remove running jobs when freeing a block. * Changes in SLURM 2.4.1 ======================== diff --git a/slurm.spec b/slurm.spec index c0b5a00940b8ab4c65a23c9cfa75571e455606a0..33270a93546fee83fdf1c0adb95282bf5cff154a 100644 --- a/slurm.spec +++ b/slurm.spec @@ -10,7 +10,7 @@ # --with auth_none %_with_auth_none 1 build auth-none RPM # --with blcr %_with_blcr 1 require blcr support # --with bluegene %_with_bluegene 1 build bluegene RPM -# --with cray_xt %_with_cray_xt 1 build for Cray XT system +# --with cray %_with_cray 1 build for a Cray system # --with debug %_with_debug 1 enable extra debugging within SLURM # --with lua %_with_lua 1 build SLURM lua bindings (proctrack only for now) # --without munge %_without_munge 1 don't build auth-munge RPM @@ -145,6 +145,11 @@ BuildRequires: mysql-devel >= 5.0.0 BuildRequires: postgresql-devel >= 8.0.0 %endif +%if %{slurm_with cray} +BuildRequires: cray-MySQL-devel-enterprise +Requires: cray-MySQL-devel-enterprise +%endif + %ifnos aix5.3 # FIXME: AIX can't seem to find this even though this is in existance there. # We should probably figure out a better way of doing this, but for now we @@ -383,7 +388,6 @@ Gives the ability for SLURM to use Berkeley Lab Checkpoint/Restart %build %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ - %{?slurm_with_cray_xt:--enable-cray-xt} \ %{?slurm_with_debug:--enable-debug} \ %{?slurm_with_partial_attach:--enable-partial-attach} \ %{?slurm_with_sun_const:--enable-sun-const} \ diff --git a/src/plugins/select/bluegene/bg_core.c b/src/plugins/select/bluegene/bg_core.c index 5a8f18bb361bfd3862a99ac157d1103ca4390567..cf7886624ae11565f336d2d8ccc9a8509e692525 100644 --- a/src/plugins/select/bluegene/bg_core.c +++ b/src/plugins/select/bluegene/bg_core.c @@ -534,7 +534,7 @@ extern int free_block_list(uint32_t job_id, List track_list, itr = list_iterator_create(bg_record->job_list); while ((job_ptr = list_next(itr))) { if ((job_ptr->magic != JOB_MAGIC) - || !IS_JOB_FINISHED(job_ptr)) + || IS_JOB_FINISHED(job_ptr)) continue; freeit = xmalloc(sizeof(kill_job_struct_t)); freeit->jobid = job_ptr->job_id; diff --git a/src/plugins/select/bluegene/select_bluegene.c b/src/plugins/select/bluegene/select_bluegene.c index ef50b77b6d4b06278f80bc48be68423555c07e7d..8945e0ce6a2a61de0a5290f88f34b33d0291495e 100644 --- a/src/plugins/select/bluegene/select_bluegene.c +++ b/src/plugins/select/bluegene/select_bluegene.c @@ -2528,14 +2528,15 @@ extern int select_p_update_block(update_block_msg_t *block_desc_ptr) info("Pending job %u on block %s " "will try to be requeued " "because overlapping block %s " - "is in an error state.", + "is being removed.", found_record->job_running, found_record->bg_block_id, bg_record->bg_block_id); else - info("Failing job %u on block %s " + info("Running job %u on block %s " + "will try to be requeued " "because overlapping block %s " - "is in an error state.", + "is being removed.", found_record->job_running, found_record->bg_block_id, bg_record->bg_block_id); @@ -2548,7 +2549,7 @@ extern int select_p_update_block(update_block_msg_t *block_desc_ptr) struct job_record *job_ptr = NULL; ListIterator job_itr = list_iterator_create( found_record->job_list); - while ((job_ptr = list_next(itr))) { + while ((job_ptr = list_next(job_itr))) { if (job_ptr->magic != JOB_MAGIC) { error("select_p_update_block: " "bad magic found when " @@ -2562,18 +2563,17 @@ extern int select_p_update_block(update_block_msg_t *block_desc_ptr) info("Pending job %u on " "block %s " "will try to be requeued " - "because overlapping " - "block %s " + "because related block %s " "is in an error state.", job_ptr->job_id, found_record->bg_block_id, bg_record->bg_block_id); else - info("Failing job %u on " - "block %s " - "because overlapping " + info("Running job %u on " "block %s " - "is in an error state.", + "will try to be requeued " + "because related block %s " + "is being removed.", job_ptr->job_id, found_record->bg_block_id, bg_record->bg_block_id);