From f648c82c35bbfccfeab38fec34b140bcf41a9817 Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Thu, 2 Apr 2009 20:24:05 +0000 Subject: [PATCH] svn merge -r17122:17124 https://eris.llnl.gov/svn/slurm/branches/slurm-1.3 --- .../select/bluegene/plugin/bg_block_info.c | 2 ++ src/plugins/select/bluegene/plugin/bg_job_run.c | 16 +++++++++++++--- src/plugins/select/bluegene/plugin/state_test.c | 2 +- 3 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/plugins/select/bluegene/plugin/bg_block_info.c b/src/plugins/select/bluegene/plugin/bg_block_info.c index e5f3bba9972..21bd998f064 100644 --- a/src/plugins/select/bluegene/plugin/bg_block_info.c +++ b/src/plugins/select/bluegene/plugin/bg_block_info.c @@ -198,6 +198,8 @@ extern int block_ready(struct job_record *job_ptr) xfree(block_id); } else rc = READY_JOB_ERROR; +/* info("returning %d for job %u %d %d", */ +/* rc, job_ptr->job_id, READY_JOB_ERROR, READY_JOB_FATAL); */ return rc; } diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c index 9bee222984a..f9a970183db 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_run.c +++ b/src/plugins/select/bluegene/plugin/bg_job_run.c @@ -171,18 +171,28 @@ static int _remove_job(db_job_id_t job_id) return STATUS_OK; } - rc = bridge_cancel_job(job_id); + /* we have been told the next 2 lines do the same + * thing, but I don't believe it to be true. In most + * cases when you do a signal of SIGTERM the mpirun + * process gets killed with a SIGTERM. In the case of + * bridge_cancel_job it always gets killed with a + * SIGKILL. From IBM's point of view that is a bad + * deally, so we are going to use signal ;). + */ + +// rc = bridge_cancel_job(job_id); + rc = bridge_signal_job(job_id, SIGTERM); if (rc != STATUS_OK) { if (rc == JOB_NOT_FOUND) { debug("job %d removed from MMCS", job_id); return STATUS_OK; - } + } if(rc == INCOMPATIBLE_STATE) debug("job %d is in an INCOMPATIBLE_STATE", job_id); else - error("bridge_cancel_job(%d): %s", job_id, + error("bridge_cancel_job(%d): %s", job_id, bg_err_str(rc)); } } diff --git a/src/plugins/select/bluegene/plugin/state_test.c b/src/plugins/select/bluegene/plugin/state_test.c index 5264caa798c..e481b029329 100644 --- a/src/plugins/select/bluegene/plugin/state_test.c +++ b/src/plugins/select/bluegene/plugin/state_test.c @@ -339,7 +339,7 @@ static void _test_down_nodes(my_bluegene_t *my_bg) int bp_num, i, rc; rm_BP_t *my_bp; - debug("Running _test_down_nodes"); + debug2("Running _test_down_nodes"); if ((rc = bridge_get_data(my_bg, RM_BPNum, &bp_num)) != STATUS_OK) { error("bridge_get_data(RM_BPNum): %s", bg_err_str(rc)); bp_num = 0; -- GitLab