From 052080f8be7be23a1fdc01ea93fb801a7b2bc08a Mon Sep 17 00:00:00 2001 From: Moe Jette <jette1@llnl.gov> Date: Wed, 15 Jul 2009 20:38:01 +0000 Subject: [PATCH] svn merge -r18132:18151 https://eris.llnl.gov/svn/slurm/branches/slurm-2.0 --- NEWS | 15 ++++- .../block_allocator/block_allocator.c | 31 +++++++++- .../select/bluegene/plugin/bg_block_info.c | 47 +++++++-------- .../select/bluegene/plugin/bg_job_run.c | 3 +- .../bluegene/plugin/bg_record_functions.c | 58 ++++++++----------- .../select/bluegene/plugin/select_bluegene.c | 22 +++---- src/slurmctld/agent.c | 5 +- 7 files changed, 104 insertions(+), 77 deletions(-) diff --git a/NEWS b/NEWS index 168d3f984ca..19b54c25002 100644 --- a/NEWS +++ b/NEWS @@ -90,6 +90,19 @@ documents those changes that are of interest to users and admins. -- BLUEGENE - add support for scontrol show blocks. -- Added support for job step time limits. +* Changes in SLURM 2.0.5 +======================== + -- BLUEGENE - Added support for emulating systems with a X-dimension of 4. + -- BLUEGENE - When a nodecard goes down on a non-Dynamic system SLURM will + now only drain blocks under 1 midplane, if no such block exists then SLURM + will drain the entire midplane and not mark any block in error state. + Previously SLURM would drain every overlapping block of the nodecard + making it possible for a large block to make other blocks not work since + they overlap some other part of the block that really isn't bad. + -- BLUEGENE - Handle L3 errors on boot better. + -- Don't revoke a pending batch launch request from the slurmctld if the + job is immediately suspended (a normal event with gang scheduling). + * Changes in SLURM 2.0.4 ======================== -- Permit node suspend/resume logic to be enabled through "scontrol reconfig" @@ -135,7 +148,7 @@ documents those changes that are of interest to users and admins. to properly handle user names that contain all digits. Return error code from uid_from_string() and gid_from_string() functions rather than a uid of -1, which might be a valid uid or gid on some systems. - -- Fix in re-calcuation of job priorities do to DOWN or DRAINED nodes. + -- Fix in re-calcuation of job priorities due to DOWN or DRAINED nodes. * Changes in SLURM 2.0.3 ======================== diff --git a/src/plugins/select/bluegene/block_allocator/block_allocator.c b/src/plugins/select/bluegene/block_allocator/block_allocator.c index 2eb93254d98..9377654475a 100644 --- a/src/plugins/select/bluegene/block_allocator/block_allocator.c +++ b/src/plugins/select/bluegene/block_allocator/block_allocator.c @@ -3779,7 +3779,7 @@ static bool _node_used(ba_node_t* ba_node, int x_size) return true; } } - + return false; } @@ -3986,6 +3986,35 @@ static int _set_external_wires(int dim, int count, ba_node_t* source, /* set up split x */ if(DIM_SIZE[X] == 1) { + } else if(DIM_SIZE[X] == 4) { + switch(count) { + case 0: + case 3: + /* 0 and 3rd Node */ + /* nothing */ + break; + case 1: + /* 1st Node */ + target = &ba_system_ptr->grid[0] + [source->coord[Y]] + [source->coord[Z]]; + /* 4->3 of 0th */ + _switch_config(source, target, dim, 4, 3); + break; + case 2: + /* 2nd Node */ + target = &ba_system_ptr->grid[3] + [source->coord[Y]] + [source->coord[Z]]; + /* 4->3 of 3rd and back */ + _switch_config(source, target, dim, 4, 3); + _switch_config(source, target, dim, 3, 4); + break; + default: + fatal("got %d for a count on a %d X-dim system", + count, DIM_SIZE[X]); + break; + } } else if(DIM_SIZE[X] == 5) { /* 4 X dim fixes for wires */ switch(count) { diff --git a/src/plugins/select/bluegene/plugin/bg_block_info.c b/src/plugins/select/bluegene/plugin/bg_block_info.c index 58df0ca635f..3b70f7367dc 100644 --- a/src/plugins/select/bluegene/plugin/bg_block_info.c +++ b/src/plugins/select/bluegene/plugin/bg_block_info.c @@ -412,7 +412,18 @@ extern int update_block_list() } remove_from_bg_list(bg_lists->booted, bg_record); - } + } else if(bg_record->state == RM_PARTITION_ERROR) { + if(bg_record->boot_state == 1) + error("Block %s in an error " + "state while booting.", + bg_record->bg_block_id); + else + error("Block %s in an error state.", + bg_record->bg_block_id); + remove_from_bg_list(bg_lists->booted, + bg_record); + trigger_block_error(); + } updated = 1; } @@ -435,32 +446,14 @@ extern int update_block_list() JOB_CONFIGURING; break; case RM_PARTITION_ERROR: - bg_record->boot_state = 0; - bg_record->boot_count = 0; - if(bg_record->job_running > NO_JOB_RUNNING) { - error("Block %s in an error " - "state while booting. " - "Failing job %u.", - bg_record->bg_block_id, - bg_record->job_running); - freeit = xmalloc( - sizeof(kill_job_struct_t)); - freeit->jobid = bg_record->job_running; - list_push(kill_job_list, freeit); - if(remove_from_bg_list( - bg_lists->job_running, - bg_record) - == SLURM_SUCCESS) { - num_unused_cpus += - bg_record->cpu_cnt; - } - } else - error("block %s in an error " - "state while booting.", - bg_record->bg_block_id); - remove_from_bg_list(bg_lists->booted, - bg_record); - trigger_block_error(); + /* If we get an error on boot that + * means it is a transparent L3 error + * and should be trying to fix + * itself. If this is the case we + * just hang out waiting for the state + * to go to free where we will try to + * boot again below. + */ break; case RM_PARTITION_FREE: if(bg_record->boot_count < RETRY_BOOT_COUNT) { diff --git a/src/plugins/select/bluegene/plugin/bg_job_run.c b/src/plugins/select/bluegene/plugin/bg_job_run.c index 2d3cff7fb13..bbb9f4a06aa 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_run.c +++ b/src/plugins/select/bluegene/plugin/bg_job_run.c @@ -163,7 +163,8 @@ static int _remove_job(db_job_id_t job_id) else if(job_state == RM_JOB_DYING) { if(count > MAX_POLL_RETRIES) error("Job %d isn't dying, trying for " - "%d seconds", count*POLL_INTERVAL); + "%d seconds", job_id, + count*POLL_INTERVAL); continue; } else if(job_state == RM_JOB_ERROR) { error("job %d is in a error state.", job_id); diff --git a/src/plugins/select/bluegene/plugin/bg_record_functions.c b/src/plugins/select/bluegene/plugin/bg_record_functions.c index b3cf6db8a36..055c8b8a63e 100644 --- a/src/plugins/select/bluegene/plugin/bg_record_functions.c +++ b/src/plugins/select/bluegene/plugin/bg_record_functions.c @@ -510,7 +510,7 @@ extern bg_record_t *find_bg_record_in_list(List my_list, char *bg_block_id) itr = list_iterator_create(my_list); while((bg_record = list_next(itr))) { if(bg_record->bg_block_id) - if(!strcmp(bg_record->bg_block_id, bg_block_id)) + if (!strcasecmp(bg_record->bg_block_id, bg_block_id)) break; } list_iterator_destroy(itr); @@ -1067,26 +1067,19 @@ extern int down_nodecard(char *bp_name, bitoff_t io_start) if(bg_record->job_running > NO_JOB_RUNNING) slurm_fail_job(bg_record->job_running); - /* mark every one of these in an error state */ - if(bg_conf->layout_mode != LAYOUT_DYNAMIC) { - if(!delete_list) - delete_list = list_create(NULL); - list_append(delete_list, bg_record); - continue; - } - - /* below is only for dynamic modes since there are - never overlapping blocks there */ - /* if the block is smaller than the create size just - continue on. + /* If Running Dynamic mode and the the block is + smaller than the create size just continue on. */ - if(bg_record->node_cnt < create_size) { + if((bg_conf->layout_mode == LAYOUT_DYNAMIC) + && (bg_record->node_cnt < create_size)) { if(!delete_list) delete_list = list_create(NULL); list_append(delete_list, bg_record); continue; } + /* keep track of the smallest size that is at least + the size of create_size. */ if(!smallest_bg_record || (smallest_bg_record->node_cnt > bg_record->node_cnt)) smallest_bg_record = bg_record; @@ -1096,27 +1089,24 @@ extern int down_nodecard(char *bp_name, bitoff_t io_start) if(bg_conf->layout_mode != LAYOUT_DYNAMIC) { debug3("running non-dynamic mode"); - if(delete_list) { - int cnt_set = 0; - /* don't lock here since it is handled inside - the put_block_in_error_state - */ - itr = list_iterator_create(delete_list); - while ((bg_record = list_next(itr))) { - /* we already handled this */ - if(bg_record->state == RM_PARTITION_ERROR) { - rc = SLURM_NO_CHANGE_IN_DATA; - continue; - } - - rc = put_block_in_error_state( - bg_record, BLOCK_ERROR_STATE); - cnt_set++; - } - if(cnt_set) - rc = SLURM_SUCCESS; - list_iterator_destroy(itr); + + /* This should never happen, but just in case... */ + if(delete_list) list_destroy(delete_list); + + /* If we found a block that is smaller or equal to a + midplane we will just mark it in an error state as + opposed to draining the node. + */ + if(smallest_bg_record + && (smallest_bg_record->node_cnt <= bg_conf->bp_node_cnt)){ + if(smallest_bg_record->state == RM_PARTITION_ERROR) { + rc = SLURM_NO_CHANGE_IN_DATA; + goto cleanup; + } + + rc = put_block_in_error_state( + smallest_bg_record, BLOCK_ERROR_STATE); goto cleanup; } diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index 3a1ba5cf4df..97baa5e6902 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -867,17 +867,19 @@ extern int select_p_update_sub_node (update_part_msg_t *part_desc_ptr) rc = SLURM_ERROR; goto end_it; } + /* make sure we are asking for a correct name */ for(i = 0; i < BA_SYSTEM_DIMENSIONS; i++) { - if((part_desc_ptr->name[i] >= '0' - && part_desc_ptr->name[i] <= '9') - || (part_desc_ptr->name[i] >= 'A' - && part_desc_ptr->name[i] <= 'Z')) { - error("update_sub_node: " - "misformatted name given %s", - part_desc_ptr->name); - rc = SLURM_ERROR; - goto end_it; - } + if((part_desc_ptr->name[j+i] >= '0' + && part_desc_ptr->name[j+i] <= '9') + || (part_desc_ptr->name[j+i] >= 'A' + && part_desc_ptr->name[j+i] <= 'Z')) + continue; + + error("update_sub_node: " + "misformatted name given %s", + part_desc_ptr->name); + rc = SLURM_ERROR; + goto end_it; } strncpy(coord, part_desc_ptr->name+j, diff --git a/src/slurmctld/agent.c b/src/slurmctld/agent.c index 1d5262ce488..2a0180752fb 100644 --- a/src/slurmctld/agent.c +++ b/src/slurmctld/agent.c @@ -1476,8 +1476,6 @@ extern void mail_job_info (struct job_record *job_ptr, uint16_t mail_type) return; } -/* return true if the requests is to launch a batch job and the message - * destination is not yet powered up, otherwise return false */ /* Test if a batch launch request should be defered * RET -1: abort the request, pending job cancelled * 0: execute the request now @@ -1503,7 +1501,8 @@ static int _batch_launch_defer(queued_request_t *queued_req_ptr) launch_msg_ptr = (batch_job_launch_msg_t *)agent_arg_ptr->msg_args; job_ptr = find_job_record(launch_msg_ptr->job_id); - if ((job_ptr == NULL) || (!IS_JOB_RUNNING(job_ptr))) { + if ((job_ptr == NULL) || + (!IS_JOB_RUNNING(job_ptr) && !IS_JOB_SUSPENDED(job_ptr))) { info("agent(batch_launch): removed pending request for " "cancelled job %u", launch_msg_ptr->job_id); -- GitLab