Skip to content
Snippets Groups Projects
Commit 366f357d authored by Danny Auble's avatar Danny Auble
Browse files

Merge remote-tracking branch 'origin/slurm-2.4'

parents a61cc1b9 a1f9b6a7
No related branches found
No related tags found
No related merge requests found
...@@ -50,6 +50,9 @@ documents those changes that are of interest to users and admins. ...@@ -50,6 +50,9 @@ documents those changes that are of interest to users and admins.
-- BLUEGENE - If a large block (> 1 midplane) is in error and underlying -- BLUEGENE - If a large block (> 1 midplane) is in error and underlying
hardware is marked bad remove the larger block and create a block over hardware is marked bad remove the larger block and create a block over
just the bad hardware making the other hardware available to run on. just the bad hardware making the other hardware available to run on.
-- BLUEGENE - Handle job completion correctly if an admin removes a block
where other blocks on an overlapping midplane are running jobs.
-- BLUEGENE - correctly remove running jobs when freeing a block.
* Changes in SLURM 2.4.1 * Changes in SLURM 2.4.1
======================== ========================
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
# --with auth_none %_with_auth_none 1 build auth-none RPM # --with auth_none %_with_auth_none 1 build auth-none RPM
# --with blcr %_with_blcr 1 require blcr support # --with blcr %_with_blcr 1 require blcr support
# --with bluegene %_with_bluegene 1 build bluegene RPM # --with bluegene %_with_bluegene 1 build bluegene RPM
# --with cray_xt %_with_cray_xt 1 build for Cray XT system # --with cray %_with_cray 1 build for a Cray system
# --with debug %_with_debug 1 enable extra debugging within SLURM # --with debug %_with_debug 1 enable extra debugging within SLURM
# --with lua %_with_lua 1 build SLURM lua bindings (proctrack only for now) # --with lua %_with_lua 1 build SLURM lua bindings (proctrack only for now)
# --without munge %_without_munge 1 don't build auth-munge RPM # --without munge %_without_munge 1 don't build auth-munge RPM
...@@ -145,6 +145,11 @@ BuildRequires: mysql-devel >= 5.0.0 ...@@ -145,6 +145,11 @@ BuildRequires: mysql-devel >= 5.0.0
BuildRequires: postgresql-devel >= 8.0.0 BuildRequires: postgresql-devel >= 8.0.0
%endif %endif
%if %{slurm_with cray}
BuildRequires: cray-MySQL-devel-enterprise
Requires: cray-MySQL-devel-enterprise
%endif
%ifnos aix5.3 %ifnos aix5.3
# FIXME: AIX can't seem to find this even though this is in existance there. # FIXME: AIX can't seem to find this even though this is in existance there.
# We should probably figure out a better way of doing this, but for now we # We should probably figure out a better way of doing this, but for now we
...@@ -383,7 +388,6 @@ Gives the ability for SLURM to use Berkeley Lab Checkpoint/Restart ...@@ -383,7 +388,6 @@ Gives the ability for SLURM to use Berkeley Lab Checkpoint/Restart
%build %build
%configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \ %configure --program-prefix=%{?_program_prefix:%{_program_prefix}} \
%{?slurm_with_cray_xt:--enable-cray-xt} \
%{?slurm_with_debug:--enable-debug} \ %{?slurm_with_debug:--enable-debug} \
%{?slurm_with_partial_attach:--enable-partial-attach} \ %{?slurm_with_partial_attach:--enable-partial-attach} \
%{?slurm_with_sun_const:--enable-sun-const} \ %{?slurm_with_sun_const:--enable-sun-const} \
......
...@@ -534,7 +534,7 @@ extern int free_block_list(uint32_t job_id, List track_list, ...@@ -534,7 +534,7 @@ extern int free_block_list(uint32_t job_id, List track_list,
itr = list_iterator_create(bg_record->job_list); itr = list_iterator_create(bg_record->job_list);
while ((job_ptr = list_next(itr))) { while ((job_ptr = list_next(itr))) {
if ((job_ptr->magic != JOB_MAGIC) if ((job_ptr->magic != JOB_MAGIC)
|| !IS_JOB_FINISHED(job_ptr)) || IS_JOB_FINISHED(job_ptr))
continue; continue;
freeit = xmalloc(sizeof(kill_job_struct_t)); freeit = xmalloc(sizeof(kill_job_struct_t));
freeit->jobid = job_ptr->job_id; freeit->jobid = job_ptr->job_id;
......
...@@ -2528,14 +2528,15 @@ extern int select_p_update_block(update_block_msg_t *block_desc_ptr) ...@@ -2528,14 +2528,15 @@ extern int select_p_update_block(update_block_msg_t *block_desc_ptr)
info("Pending job %u on block %s " info("Pending job %u on block %s "
"will try to be requeued " "will try to be requeued "
"because overlapping block %s " "because overlapping block %s "
"is in an error state.", "is being removed.",
found_record->job_running, found_record->job_running,
found_record->bg_block_id, found_record->bg_block_id,
bg_record->bg_block_id); bg_record->bg_block_id);
else else
info("Failing job %u on block %s " info("Running job %u on block %s "
"will try to be requeued "
"because overlapping block %s " "because overlapping block %s "
"is in an error state.", "is being removed.",
found_record->job_running, found_record->job_running,
found_record->bg_block_id, found_record->bg_block_id,
bg_record->bg_block_id); bg_record->bg_block_id);
...@@ -2548,7 +2549,7 @@ extern int select_p_update_block(update_block_msg_t *block_desc_ptr) ...@@ -2548,7 +2549,7 @@ extern int select_p_update_block(update_block_msg_t *block_desc_ptr)
struct job_record *job_ptr = NULL; struct job_record *job_ptr = NULL;
ListIterator job_itr = list_iterator_create( ListIterator job_itr = list_iterator_create(
found_record->job_list); found_record->job_list);
while ((job_ptr = list_next(itr))) { while ((job_ptr = list_next(job_itr))) {
if (job_ptr->magic != JOB_MAGIC) { if (job_ptr->magic != JOB_MAGIC) {
error("select_p_update_block: " error("select_p_update_block: "
"bad magic found when " "bad magic found when "
...@@ -2562,18 +2563,17 @@ extern int select_p_update_block(update_block_msg_t *block_desc_ptr) ...@@ -2562,18 +2563,17 @@ extern int select_p_update_block(update_block_msg_t *block_desc_ptr)
info("Pending job %u on " info("Pending job %u on "
"block %s " "block %s "
"will try to be requeued " "will try to be requeued "
"because overlapping " "because related block %s "
"block %s "
"is in an error state.", "is in an error state.",
job_ptr->job_id, job_ptr->job_id,
found_record->bg_block_id, found_record->bg_block_id,
bg_record->bg_block_id); bg_record->bg_block_id);
else else
info("Failing job %u on " info("Running job %u on "
"block %s "
"because overlapping "
"block %s " "block %s "
"is in an error state.", "will try to be requeued "
"because related block %s "
"is being removed.",
job_ptr->job_id, job_ptr->job_id,
found_record->bg_block_id, found_record->bg_block_id,
bg_record->bg_block_id); bg_record->bg_block_id);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment