From 78aa6a508b9dbf321da3f0e597a6061b8c337cbc Mon Sep 17 00:00:00 2001 From: Danny Auble <da@llnl.gov> Date: Tue, 16 Jun 2009 18:12:32 +0000 Subject: [PATCH] svn merge -r17853:17864 https://eris.llnl.gov/svn/slurm/branches/slurm-2.0 --- NEWS | 7 ++++-- src/plugins/sched/backfill/backfill.c | 24 +++++++++---------- .../select/bluegene/plugin/bg_job_place.c | 3 --- src/scontrol/scontrol.c | 12 ++++++++-- src/slurmctld/reservation.c | 2 +- src/slurmctld/reservation.h | 4 ++-- 6 files changed, 30 insertions(+), 22 deletions(-) diff --git a/NEWS b/NEWS index aadfb3c97b0..19c4c0dd401 100644 --- a/NEWS +++ b/NEWS @@ -66,7 +66,7 @@ documents those changes that are of interest to users and admins. -- Fix select/linear to properly set a job's count of allocated processors (all processors on the allocated nodes). -- Fix select/cons_res to allocate proper CPU count when --ntasks-per-node - option is used. + option is used without a task count in the job request. -- Insure that no node is allocated to a job for which the CPU count is less than --ntasks-per-node * --cpus-per-task. -- Correct AllocProcs reported by "scontrol show node" when ThreadsPerCore @@ -86,9 +86,12 @@ documents those changes that are of interest to users and admins. -- Make sched/backfill properly schedule jobs with constraints having node counts. NOTE: Backfill of jobs with constraings having exclusive OR operators are not fully supported. - -- if srun is cancelled by SIGINT that job is marked as cancelled. + -- If srun is cancelled by SIGINT, set the job state to cancelled, not + failed. -- BLUEGENE - Fix, for if you are setting an subbp into an error mode where the subbp stated isn't the first ionode in a nodecard. + -- fix for backfill to not core when checking shared nodes. + -- fix for scontrol to not core when hitting just return in interactive mode. * Changes in SLURM 2.0.1 ======================== diff --git a/src/plugins/sched/backfill/backfill.c b/src/plugins/sched/backfill/backfill.c index 6f15b8b6356..f05a4586ff3 100644 --- a/src/plugins/sched/backfill/backfill.c +++ b/src/plugins/sched/backfill/backfill.c @@ -122,7 +122,7 @@ static void _diff_tv_str(struct timeval *tv1,struct timeval *tv2, static bool _more_work(void); static int _num_feature_count(struct job_record *job_ptr); static int _start_job(struct job_record *job_ptr, bitstr_t *avail_bitmap); -static int _try_sched(struct job_record *job_ptr, bitstr_t *avail_bitmap, +static int _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes); @@ -191,7 +191,7 @@ static int _num_feature_count(struct job_record *job_ptr) * IN/OUT avail_bitmap - nodes available/selected to use * RET SLURM_SUCCESS on success, otherwise an error code */ -static int _try_sched(struct job_record *job_ptr, bitstr_t *avail_bitmap, +static int _try_sched(struct job_record *job_ptr, bitstr_t **avail_bitmap, uint32_t min_nodes, uint32_t max_nodes, uint32_t req_nodes) { @@ -225,12 +225,12 @@ static int _try_sched(struct job_record *job_ptr, bitstr_t *avail_bitmap, } list_iterator_destroy(feat_iter); - if ((job_req_node_filter(job_ptr, avail_bitmap) != + if ((job_req_node_filter(job_ptr, *avail_bitmap) != SLURM_SUCCESS) || - (bit_set_count(avail_bitmap) < high_cnt)) { + (bit_set_count(*avail_bitmap) < high_cnt)) { rc = ESLURM_NODES_BUSY; } else { - rc = select_g_job_test(job_ptr, avail_bitmap, + rc = select_g_job_test(job_ptr, *avail_bitmap, high_cnt, max_nodes, req_nodes, SELECT_MODE_WILL_RUN); } @@ -250,15 +250,15 @@ static int _try_sched(struct job_record *job_ptr, bitstr_t *avail_bitmap, uint16_t orig_shared; orig_shared = job_ptr->details->shared; job_ptr->details->shared = 0; - tmp_bitmap = bit_copy(avail_bitmap); - rc = select_g_job_test(job_ptr, avail_bitmap, min_nodes, + tmp_bitmap = bit_copy(*avail_bitmap); + rc = select_g_job_test(job_ptr, *avail_bitmap, min_nodes, max_nodes, req_nodes, SELECT_MODE_WILL_RUN); job_ptr->details->shared = orig_shared; if ((rc != SLURM_SUCCESS) && (orig_shared != 0)) { - FREE_NULL_BITMAP(avail_bitmap); - avail_bitmap= tmp_bitmap; - rc = select_g_job_test(job_ptr, avail_bitmap, + FREE_NULL_BITMAP(*avail_bitmap); + *avail_bitmap= tmp_bitmap; + rc = select_g_job_test(job_ptr, *avail_bitmap, min_nodes, max_nodes, req_nodes, SELECT_MODE_WILL_RUN); } else @@ -462,11 +462,11 @@ static void _attempt_backfill(void) if (job_req_node_filter(job_ptr, avail_bitmap)) continue; /* nodes lack features */ - j = _try_sched(job_ptr, avail_bitmap, + j = _try_sched(job_ptr, &avail_bitmap, min_nodes, max_nodes, req_nodes); if (j != SLURM_SUCCESS) continue; /* not runable */ - + job_ptr->start_time = MAX(job_ptr->start_time, start_res); if (job_ptr->start_time <= now) { int rc = _start_job(job_ptr, resv_bitmap); diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index 4acd56f0053..215966fbc78 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -97,7 +97,6 @@ static int _check_for_booted_overlapping_blocks( bool test_only); static int _dynamically_request(List block_list, int *blocks_added, ba_request_t *request, - bitstr_t* slurm_block_bitmap, char *user_req_nodes); static int _find_best_block_match(List block_list, int *blocks_added, struct job_record* job_ptr, @@ -709,7 +708,6 @@ static int _check_for_booted_overlapping_blocks( static int _dynamically_request(List block_list, int *blocks_added, ba_request_t *request, - bitstr_t* slurm_block_bitmap, char *user_req_nodes) { List list_of_lists = NULL; @@ -1092,7 +1090,6 @@ static int _find_best_block_match(List block_list, if((rc = _dynamically_request(block_list, blocks_added, &request, - slurm_block_bitmap, job_ptr->details->req_nodes)) == SLURM_SUCCESS) { create_try = 1; diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index 8b514cc7075..2d1e2749b1a 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -461,14 +461,22 @@ _process_command (int argc, char *argv[]) { int error_code = 0; char *tag = argv[0]; - int taglen = strlen(tag); + int taglen = 0; if (argc < 1) { exit_code = 1; if (quiet_flag == -1) fprintf(stderr, "no input"); + return 0; + } else if(tag) + taglen = strlen(tag); + else { + if (quiet_flag == -1) + fprintf(stderr, "input problem"); + return 0; } - else if (strncasecmp (tag, "abort", MAX(taglen, 5)) == 0) { + +if (strncasecmp (tag, "abort", MAX(taglen, 5)) == 0) { /* require full command name */ if (argc > 2) { exit_code = 1; diff --git a/src/slurmctld/reservation.c b/src/slurmctld/reservation.c index 8b178d7b0bd..95856585267 100644 --- a/src/slurmctld/reservation.c +++ b/src/slurmctld/reservation.c @@ -2337,7 +2337,7 @@ extern int job_test_resv(struct job_record *job_ptr, time_t *when, if (rc == SLURM_SUCCESS) break; - + /* rc == ESLURM_NODES_BUSY here from above break */ if (move_time && (i<10)) { /* Retry for later start time */ bit_nset(*node_bitmap, 0, (node_record_count - 1)); rc = SLURM_SUCCESS; diff --git a/src/slurmctld/reservation.h b/src/slurmctld/reservation.h index f019aba2955..d05ee8ee080 100644 --- a/src/slurmctld/reservation.h +++ b/src/slurmctld/reservation.h @@ -50,10 +50,10 @@ extern time_t last_resv_update; /* Create a resource reservation */ extern int create_resv(resv_desc_msg_t *resv_desc_ptr); -/* Update an exiting resource reservation */ +/* Update an existing resource reservation */ extern int update_resv(resv_desc_msg_t *resv_desc_ptr); -/* Delete an exiting resource reservation */ +/* Delete an existing resource reservation */ extern int delete_resv(reservation_name_msg_t *resv_desc_ptr); /* Dump the reservation records to a buffer */ -- GitLab