diff --git a/NEWS b/NEWS index dd21b410d036f1c56748724ecb40d2b9624dc45c..4c4c8a01b6ab0a5a804623e4ca5fe86c113bd036 100644 --- a/NEWS +++ b/NEWS @@ -1,6 +1,11 @@ This file describes changes in recent versions of SLURM. It primarily documents those changes that are of interest to users and admins. +* Changes in SLURM 1.1.0-pre4 +============================= + -- Bluegene specific - Added support to set bluegene block state to + free/error via scontrol update BlockName + * Changes in SLURM 1.1.0-pre3 ============================= -- Added framework for XCPU job launch support. diff --git a/doc/html/bluegene.shtml b/doc/html/bluegene.shtml index 300cee3691a905cc4be069453ed03c89c76f4259..803baa1a2706a76a65033364a1796028d9cef9cf 100644 --- a/doc/html/bluegene.shtml +++ b/doc/html/bluegene.shtml @@ -355,6 +355,12 @@ if resources are avaliable and prevent larger jobs from running. Bgblocks need not be assigned in the <i>bluegene.conf</i> file for this mode.</p> +<p>Blocks can be freed or set in an error state with scontrol. +<br>(i.e. scontrol update BlockName=RMP0 state=error) +<br>This will end any job on the block and set the state of the block to ERROR +making it so no job will run on the block. To set it back to a usuable state +set state=free. + <p>One of these modes must be defined in the <i>bluegene.conf</i> file with the option <i>LayoutMode=MODE</i> (where MODE=STATIC, DYNAMIC or OVERLAP).</p> diff --git a/doc/html/selectplugins.shtml b/doc/html/selectplugins.shtml index 5b3e9d9b2c6e422881185ab1a3cc31710140122a..4e5d5983c55f52550d3aaf5c9e358ce2edccf934 100644 --- a/doc/html/selectplugins.shtml +++ b/doc/html/selectplugins.shtml @@ -37,13 +37,13 @@ Note carefully, however, the versioning discussion below.</p> <p>A simplified flow of logic follows: <pre> slurmctld daemon starts -if (<i>select_p_state_restor)</i>() != SLURM_SUCCESS) +if (<i>select_p_state_restore)</i>() != SLURM_SUCCESS) abort slurmctld reads the rest of its configuration and state information if (<i>select_p_node_init</i>() != SLURM_SUCCESS) abort -if (<i>select_p_part_init</i>() != SLURM_SUCCESS) +if (<i>select_p_block_init</i>() != SLURM_SUCCESS) abort wait for job @@ -132,7 +132,7 @@ of node data records.</p> <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, the plugin should return SLURM_ERROR, causing slurmctld to exit.</p> -<p class="commandline">int select_p_part_init (List part_list);</p> +<p class="commandline">int select_p_block_init (List block_list);</p> <p style="margin-left:.2in"><b>Description</b>: Note the initialization of the partition record data structure. This function is called when the partition records are initially established and again when any partition configurations change. </p> @@ -143,6 +143,14 @@ consider that nodes can be removed from one partition and added to a different p <p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, the plugin should return SLURM_ERROR, causing slurmctld to exit.</p> +<p class="commandline">int select_p_update_block (update_part_msg_t *part_desc_ptr);</p> +<p style="margin-left:.2in"><b>Description</b>: This function is called when the admin needs to manually update the state of a block. </p> +<p style="margin-left:.2in"><b>Arguments</b>: +<span class="commandline"> part_desc_ptr</span> (input) partitition +description variable. Containing the block name and the state to set the block.</p> +<p style="margin-left:.2in"><b>Returns</b>: SLURM_SUCCESS if successful. On failure, +the plugin should return SLURM_ERROR.</p> + <p class="commandline">int select_p_pack_node_info (time_t last_query_time, Buf *buffer_ptr);</p> <p style="margin-left:.2in"><b>Description</b>: pack node specific information into a buffer.</p> <p style="margin-left:.2in"><b>Arguments</b>: diff --git a/doc/man/man1/scontrol.1 b/doc/man/man1/scontrol.1 index bfff655cdef4a24ac9cc1503a32e9b1786b71a6d..c9ca248da98798695281cbd869e9dd31498d4408 100644 --- a/doc/man/man1/scontrol.1 +++ b/doc/man/man1/scontrol.1 @@ -343,6 +343,20 @@ in the partition. Specify a number or "INFINITE". \fIMinNodes\fP=<count> Set the minimum number of nodes which will be allocated to any single job in the partition. +.TP +\fBSPECIFICATIONS FOR UPDATE, BLOCK \fR +.TP +Bluegene systems only! +.TP +\fIBlockName\fP=<name> +Identify the bluegene block to be updated. This specification is required. +.TP +\fIState\fP=<free|error> +This will update the state of a bluegene block to either FREE or ERROR. +(i.e. update BlockName=RMP0 STATE=ERROR) State error will not allow jobs +to run on the block. \fBWARNING!!!!\fR This will cancel any +running job on the block! +.TP .SH "ENVIRONMENT VARIABLES" .PP Some \fBscontrol\fR options may diff --git a/src/common/node_select.c b/src/common/node_select.c index bd2ac2c5ceb9a587915100947d7742fda983d67b..53ae1561fb220993297ecb27ce64dbd07dd454e3 100644 --- a/src/common/node_select.c +++ b/src/common/node_select.c @@ -58,37 +58,39 @@ */ typedef struct slurm_select_ops { - int (*state_save) ( char *dir_name ); - int (*state_restore) ( char *dir_name ); - int (*job_init) ( List job_list ); - int (*node_init) ( struct node_record *node_ptr, - int node_cnt); - int (*part_init) ( List part_list ); - int (*job_test) ( struct job_record *job_ptr, - bitstr_t *bitmap, - int min_nodes, - int max_nodes, - bool test_only); - int (*job_begin) ( struct job_record *job_ptr ); - int (*job_ready) ( struct job_record *job_ptr ); - int (*job_fini) ( struct job_record *job_ptr ); - int (*job_suspend) ( struct job_record *job_ptr ); - int (*job_resume) ( struct job_record *job_ptr ); - int (*pack_node_info) ( time_t last_query_time, - Buf *buffer_ptr); - int (*get_extra_jobinfo) ( struct node_record *node_ptr, - struct job_record *job_ptr, - enum select_data_info cr_info, - void *data); - int (*get_select_nodeinfo) ( struct node_record *node_ptr, - enum select_data_info cr_info, - void *data); - int (*update_nodeinfo) ( struct job_record *job_ptr, - enum select_data_info cr_info); - int (*get_info_from_plugin) ( enum select_data_info cr_info, - void *data); - int (*alter_node_cnt) ( enum select_node_cnt type, - void *data); + int (*state_save) (char *dir_name); + int (*state_restore) (char *dir_name); + int (*job_init) (List job_list); + int (*node_init) (struct node_record *node_ptr, + int node_cnt); + int (*block_init) (List block_list); + int (*job_test) (struct job_record *job_ptr, + bitstr_t *bitmap, + int min_nodes, + int max_nodes, + bool test_only); + int (*job_begin) (struct job_record *job_ptr); + int (*job_ready) (struct job_record *job_ptr); + int (*job_fini) (struct job_record *job_ptr); + int (*job_suspend) (struct job_record *job_ptr); + int (*job_resume) (struct job_record *job_ptr); + int (*pack_node_info) (time_t last_query_time, + Buf *buffer_ptr); + int (*get_extra_jobinfo) (struct node_record *node_ptr, + struct job_record *job_ptr, + enum select_data_info cr_info, + void *data); + int (*get_select_nodeinfo) (struct node_record *node_ptr, + enum select_data_info cr_info, + void *data); + int (*update_nodeinfo) (struct job_record *job_ptr, + enum select_data_info cr_info); + int (*update_block) (update_part_msg_t + *part_desc_ptr); + int (*get_info_from_plugin)(enum select_data_info cr_info, + void *data); + int (*alter_node_cnt) (enum select_node_cnt type, + void *data); } slurm_select_ops_t; typedef struct slurm_select_context { @@ -107,16 +109,16 @@ static pthread_mutex_t g_select_context_lock = # define JOBINFO_MAGIC 0x83ac struct select_jobinfo { uint16_t geometry[SYSTEM_DIMENSIONS]; /* node count in various - * dimensions, e.g. X, Y, and Z */ + * dimensions, e.g. XYZ */ uint16_t conn_type; /* see enum connection_type */ uint16_t rotate; /* permit geometry rotation if set */ uint16_t node_use; /* see enum node_use_type */ - char *bg_block_id; /* Blue Gene partition ID */ + char *bg_block_id; /* Blue Gene block ID */ uint16_t magic; /* magic number */ uint16_t quarter; /* for bg to tell which quarter of a small - * partition the job is running */ + * block the job is running */ uint16_t nodecard; /* for bg to tell which nodecard of a quarter - * of a small partition the job is running */ + * of a small block the job is running */ uint32_t node_cnt; /* how many cnodes in block */ uint16_t altered; /* see if we have altered this job * or not yet */ @@ -155,6 +157,7 @@ static slurm_select_ops_t * _select_get_ops(slurm_select_context_t *c) "select_p_get_extra_jobinfo", "select_p_get_select_nodeinfo", "select_p_update_nodeinfo", + "select_p_update_block", "select_p_get_info_from_plugin", "select_p_alter_node_cnt" }; @@ -337,15 +340,15 @@ extern int select_g_node_init(struct node_record *node_ptr, int node_cnt) /* - * Note re/initialization of partition record data structure - * IN part_list - list of partition records + * Note re/initialization of block record data structure + * IN block_list - list of partition records */ -extern int select_g_block_init(List part_list) +extern int select_g_block_init(List block_list) { if (slurm_select_init() < 0) return SLURM_ERROR; - return (*(g_select_context->ops.part_init))(part_list); + return (*(g_select_context->ops.block_init))(block_list); } /* @@ -403,6 +406,19 @@ extern int select_g_update_nodeinfo (struct job_record *job_ptr, return (*(g_select_context->ops.update_nodeinfo))(job_ptr, cr_info); } +/* + * Update specific block (usually something has gone wrong) + * IN cr_info - type of data to update for a given job record + * IN part_desc_ptr - information about the block + */ +extern int select_g_update_block (update_part_msg_t *part_desc_ptr) +{ + if (slurm_select_init() < 0) + return SLURM_ERROR; + + return (*(g_select_context->ops.update_block))(part_desc_ptr); +} + /* * Get select data from a plugin * IN node_pts - current node record @@ -856,7 +872,7 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo, switch (mode) { case SELECT_PRINT_HEAD: snprintf(buf, size, - "CONNECT ROTATE MAX_PROCS GEOMETRY PART_ID"); + "CONNECT ROTATE MAX_PROCS GEOMETRY BLOCK_ID"); break; case SELECT_PRINT_DATA: convert_to_kilo(jobinfo->max_procs, tmp_char); @@ -872,7 +888,7 @@ extern char *select_g_sprint_jobinfo(select_jobinfo_t jobinfo, convert_to_kilo(jobinfo->max_procs, tmp_char); snprintf(buf, size, "Connection=%s Rotate=%s MaxProcs=%s " - "Geometry=%ux%ux%u Part_ID=%s", + "Geometry=%ux%ux%u Block_ID=%s", _job_conn_type_string(jobinfo->conn_type), _job_rotate_string(jobinfo->rotate), tmp_char, diff --git a/src/common/node_select.h b/src/common/node_select.h index 5da7008bbd108e322b75a0a8c7c3b546988ed6df..a32a71318a9eee8dc44efadf7b36fab4f84cdf96 100644 --- a/src/common/node_select.h +++ b/src/common/node_select.h @@ -85,6 +85,13 @@ extern int select_g_get_select_nodeinfo (struct node_record *node_ptr, extern int select_g_update_nodeinfo (struct job_record *job_ptr, enum select_data_info cr_info); +/* + * Update specific block (usually something has gone wrong) + * IN cr_info - type of data to update for a given job record + * IN part_desc_ptr - information about the block + */ +extern int select_g_update_block (update_part_msg_t *part_desc_ptr); + /* * Get select data from a plugin * IN node_pts - current node record @@ -104,7 +111,7 @@ extern int select_g_alter_node_cnt (enum select_node_cnt type, void *data); * Note re/initialization of partition record data structure * IN part_list - list of partition records */ -extern int select_g_part_init(List part_list); +extern int select_g_block_init(List part_list); /* * Note the initialization of job records, issued upon restart of diff --git a/src/plugins/select/bluegene/plugin/bg_block_info.c b/src/plugins/select/bluegene/plugin/bg_block_info.c index 5a867a8c3788257bdbb6c156f5d53d6c8ace02d1..841979b2060bbafd5cbead5fc7a54c080d62bd89 100644 --- a/src/plugins/select/bluegene/plugin/bg_block_info.c +++ b/src/plugins/select/bluegene/plugin/bg_block_info.c @@ -60,7 +60,6 @@ #ifdef HAVE_BG_FILES static int _block_is_deallocating(bg_record_t *bg_record); -static void _drain_as_needed(char *node_list, char *reason); static int _block_is_deallocating(bg_record_t *bg_record) { @@ -122,37 +121,9 @@ static int _block_is_deallocating(bg_record_t *bg_record) return SLURM_SUCCESS; } - -/* If any nodes in node_list are drained, draining, or down, - * then just return - * else drain all of the nodes - * This function lets us drain an entire bgblock only if - * we have not already identified a specific node as bad. */ -static void _drain_as_needed(char *node_list, char *reason) -{ - bool needed = true; - hostlist_t hl; - char *host; - - /* scan node list */ - hl = hostlist_create(node_list); - if (!hl) { - slurm_drain_nodes(node_list, reason); - return; - } - while ((host = hostlist_shift(hl))) { - if (node_already_down(host)) { - needed = false; - break; - } - } - hostlist_destroy(hl); - - if (needed) - slurm_drain_nodes(node_list, reason); -} #endif + /* * check to see if partition is ready to execute. Meaning * User is added to the list of users able to run, and no one @@ -362,7 +333,8 @@ extern int update_block_list() updated = -1; slurm_mutex_unlock(&block_state_mutex); break; - } else if(bg_record->state != state) { + } else if(bg_record->job_running != -3 //plugin set error + && bg_record->state != state) { debug("state of Partition %s was %d and now is %d", bg_record->bg_block_id, bg_record->state, @@ -417,10 +389,12 @@ extern int update_block_list() bg_record->boot_count); bg_record->boot_count++; } else { - error("Couldn't boot Partition %s " + error("Couldn't boot Block %s " "for user %s", bg_record->bg_block_id, bg_record->target_name); + slurm_mutex_unlock(&block_state_mutex); + now = time(NULL); time_ptr = localtime(&now); strftime(reason, sizeof(reason), @@ -428,8 +402,8 @@ extern int update_block_list() "Boot fails " "[SLURM@%b %d %H:%M]", time_ptr); - _drain_as_needed(bg_record->nodes, - reason); + drain_as_needed(bg_record, reason); + slurm_mutex_lock(&block_state_mutex); bg_record->boot_state = 0; bg_record->boot_count = 0; } diff --git a/src/plugins/select/bluegene/plugin/bg_block_info.h b/src/plugins/select/bluegene/plugin/bg_block_info.h index 5143a30a0639095c87265641d45506e3b15804c3..631c9ed4ac296bacc2cb5fa3ddb8ec7393505577 100644 --- a/src/plugins/select/bluegene/plugin/bg_block_info.h +++ b/src/plugins/select/bluegene/plugin/bg_block_info.h @@ -29,8 +29,8 @@ #include "bluegene.h" /*****************************************************/ -extern int part_ready(struct job_record *job_ptr); -extern void pack_partition(bg_record_t *bg_record, Buf buffer); -extern int unpack_partition(bg_info_record_t *bg_info_record, Buf buffer); -extern int update_partition_list(); +extern int block_ready(struct job_record *job_ptr); +extern void pack_block(bg_record_t *bg_record, Buf buffer); +extern int unpack_block(bg_info_record_t *bg_info_record, Buf buffer); +extern int update_block_list(); #endif /* _BG_PART_INFO_H_ */ diff --git a/src/plugins/select/bluegene/plugin/bg_job_place.c b/src/plugins/select/bluegene/plugin/bg_job_place.c index 019d4f8e675d4156de6e176027f66bd19d3fe34f..de7c52f1bf0c31b574dccf0a881afc49426f435f 100644 --- a/src/plugins/select/bluegene/plugin/bg_job_place.c +++ b/src/plugins/select/bluegene/plugin/bg_job_place.c @@ -140,8 +140,9 @@ try_again: */ debug3("%s job_running = %d", record->bg_block_id, record->job_running); - /*partition is being destroyed, ignore it*/ - if(record->job_running == -2) + /*partition is being destroyed (-2), + or is messed up some how (-3) ignore it*/ + if(record->job_running < -1) continue; else if((record->job_running != -1) && !test_only) { @@ -153,10 +154,6 @@ try_again: continue; } - /* if(!test_only && bluegene_layout_mode == LAYOUT_OVERLAP) { */ -/* if(!created && record->state != RM_PARTITION_READY) */ -/* continue; */ -/* } */ /* Check processor count */ proc_cnt = record->bp_count * record->cpus_per_bp; debug3("asking for %d-%d looking at %d", @@ -237,13 +234,25 @@ try_again: } } if(!test_only - && (found_record->job_running > -1)) { - debug("can't use %s, there is a job " - "(%d) running on an overlapping " - "block %s", - record->bg_block_id, - found_record->job_running, - found_record->bg_block_id); + && ((found_record->job_running > -1) + || (found_record->job_running == -3))) { + if(found_record->job_running > -1) + debug("can't use %s, there is " + "a job (%d) running on " + "an overlapping " + "block %s", + record->bg_block_id, + found_record-> + job_running, + found_record-> + bg_block_id); + else + error("can't use %s, " + "overlapping block %s " + "is in an error state.", + record->bg_block_id, + found_record-> + bg_block_id); if(bluegene_layout_mode == LAYOUT_DYNAMIC) { temp_list = list_create(NULL); @@ -253,7 +262,7 @@ try_again: list_destroy(temp_list); } break; - } + } } } list_iterator_destroy(itr2); diff --git a/src/plugins/select/bluegene/plugin/bluegene.c b/src/plugins/select/bluegene/plugin/bluegene.c index e04c9a222f051ab565d7ef65e6579ef6d695860c..4c2679f95ce9dba02b0cafdef2ed0c03cc994416 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.c +++ b/src/plugins/select/bluegene/plugin/bluegene.c @@ -527,6 +527,52 @@ extern int update_block_user(bg_record_t *bg_record, int set) return 0; } +/* If any nodes in node_list are drained, draining, or down, + * then just return + * else drain all of the nodes + * This function lets us drain an entire bgblock only if + * we have not already identified a specific node as bad. */ +extern void drain_as_needed(bg_record_t *bg_record, char *reason) +{ + bool needed = true; + hostlist_t hl; + char *host = NULL; + + if(bg_record->job_running > -1) + slurm_fail_job(bg_record->job_running); + + /* small blocks */ + if(bg_record->cpus_per_bp != procs_per_node) { + info("small block"); + while(bg_record->job_running > -1) + sleep(1); + slurm_mutex_lock(&block_state_mutex); + bg_record->job_running = -3; + bg_record->state = RM_PARTITION_ERROR; + slurm_mutex_unlock(&block_state_mutex); + return; + } + + /* at least one base partition */ + hl = hostlist_create(bg_record->nodes); + if (!hl) { + slurm_drain_nodes(bg_record->nodes, reason); + return; + } + while ((host = hostlist_shift(hl))) { + if (node_already_down(host)) { + needed = false; + free(host); + break; + } + free(host); + } + hostlist_destroy(hl); + + if (needed) + slurm_drain_nodes(bg_record->nodes, reason); +} + extern int format_node_name(bg_record_t *bg_record, char tmp_char[]) { if(bg_record->quarter != (uint16_t)NO_VAL) { diff --git a/src/plugins/select/bluegene/plugin/bluegene.h b/src/plugins/select/bluegene/plugin/bluegene.h index 3a5f3e51b8d0e7744d49a830774605a19e03f6b4..09e230464134ef2e3f0c0787d981d6ead2a4546f 100644 --- a/src/plugins/select/bluegene/plugin/bluegene.h +++ b/src/plugins/select/bluegene/plugin/bluegene.h @@ -170,6 +170,7 @@ extern bg_record_t *find_bg_record_in_list(List my_list, char *bg_block_id); updated before call of function. */ extern int update_block_user(bg_record_t *bg_block_id, int set); +extern void drain_as_needed(bg_record_t *bg_record, char *reason); extern int format_node_name(bg_record_t *bg_record, char tmp_char[]); extern bool blocks_overlap(bg_record_t *rec_a, bg_record_t *rec_b); diff --git a/src/plugins/select/bluegene/plugin/select_bluegene.c b/src/plugins/select/bluegene/plugin/select_bluegene.c index b90c56f463b7108e041069d24871c848efacdbce..e0bef64977a7be24bcde6c475a8848d571fdcf28 100644 --- a/src/plugins/select/bluegene/plugin/select_bluegene.c +++ b/src/plugins/select/bluegene/plugin/select_bluegene.c @@ -72,6 +72,8 @@ static pthread_mutex_t thread_flag_mutex = PTHREAD_MUTEX_INITIALIZER; /** initialize the status pthread */ static int _init_status_pthread(void); +static int _wait_for_thread (pthread_t thread_id); +static char *_block_state_str(int state); /* * init() is called when the plugin is loaded, before any other functions @@ -142,6 +144,23 @@ static int _wait_for_thread (pthread_t thread_id) return SLURM_ERROR; } +static char *_block_state_str(int state) +{ + static char tmp[16]; + +#ifdef HAVE_BG + switch (state) { + case 0: + return "ERROR"; + case 1: + return "FREE"; + } +#endif + + snprintf(tmp, sizeof(tmp), "%d", state); + return tmp; +} + extern int fini ( void ) { int rc = SLURM_SUCCESS; @@ -339,6 +358,52 @@ extern int select_p_update_nodeinfo (struct job_record *job_ptr, return SLURM_SUCCESS; } +extern int select_p_update_part (update_part_msg_t *part_desc_ptr) +{ + int rc = SLURM_SUCCESS; + bg_record_t *bg_record = NULL; + time_t now; + struct tm *time_ptr; + char reason[128]; + char time_str[64]; + + bg_record = find_bg_record_in_list(bg_list, part_desc_ptr->name); + if(!bg_record) + return SLURM_ERROR; + now = time(NULL); + time_ptr = localtime(&now); + strftime(time_str, sizeof(time_str), + "[SLURM@%b %d %H:%M]", + time_ptr); + snprintf(reason, sizeof(reason), + "update_block: " + "Admin set block %s state to %s %s", + bg_record->bg_block_id, + _block_state_str(part_desc_ptr->state_up), + time_str); + if(bg_record->job_running > -1) { + slurm_fail_job(bg_record->job_running); + while(bg_record->job_running > -1) + sleep(1); + } + if(!part_desc_ptr->state_up) { + slurm_mutex_lock(&block_state_mutex); + bg_record->job_running = -3; + bg_record->state = RM_PARTITION_ERROR; + slurm_mutex_unlock(&block_state_mutex); + } else if(part_desc_ptr->state_up){ + slurm_mutex_lock(&block_state_mutex); + bg_record->job_running = -1; + bg_record->state = RM_PARTITION_FREE; + slurm_mutex_unlock(&block_state_mutex); + } else { + return rc; + } + info("%s",reason); + last_bg_update = time(NULL); + return rc; +} + extern int select_p_get_extra_jobinfo (struct node_record *node_ptr, struct job_record *job_ptr, enum select_data_info info, diff --git a/src/plugins/select/bluegene/plugin/state_test.c b/src/plugins/select/bluegene/plugin/state_test.c index 1b4b53ae49b574e009691501fca62113eef67ab9..e4d454371b552815b78681b1f60a0f135c93c35c 100644 --- a/src/plugins/select/bluegene/plugin/state_test.c +++ b/src/plugins/select/bluegene/plugin/state_test.c @@ -43,25 +43,6 @@ #ifdef HAVE_BG_FILES -/* Determine if specific slurm node is already in DOWN or DRAIN state */ -extern bool node_already_down(char *node_name) -{ - uint16_t base_state; - struct node_record *node_ptr = find_node_record(node_name); - - if (node_ptr) { - base_state = node_ptr->node_state & - (~NODE_STATE_NO_RESPOND); - if ((base_state == NODE_STATE_DOWN) - || (base_state == NODE_STATE_DRAIN)) - return true; - else - return false; - } - - return false; -} - /* Find the specified BlueGene node ID and drain it from SLURM */ static void _configure_node_down(rm_bp_id_t bp_id, rm_BGL_t *bg) { @@ -305,6 +286,25 @@ static void _test_down_switches(rm_BGL_t *bg) } #endif +/* Determine if specific slurm node is already in DOWN or DRAIN state */ +extern bool node_already_down(char *node_name) +{ + uint16_t base_state; + struct node_record *node_ptr = find_node_record(node_name); + + if (node_ptr) { + base_state = node_ptr->node_state & + (~NODE_STATE_NO_RESPOND); + if ((base_state == NODE_STATE_DOWN) + || (base_state == NODE_STATE_DRAIN)) + return true; + else + return false; + } + + return false; +} + /* * Search MMCS for failed switches and nodes. Failed resources are DRAINED in * SLURM. This relies upon rm_get_BG(), which is slow (10+ seconds) so run diff --git a/src/plugins/select/cons_res/select_cons_res.c b/src/plugins/select/cons_res/select_cons_res.c index b1e90a603808806d344248bb523da3e2a00927e0..c2b672b6f12cd0269ff62976857602b3e9ea82a1 100644 --- a/src/plugins/select/cons_res/select_cons_res.c +++ b/src/plugins/select/cons_res/select_cons_res.c @@ -1266,6 +1266,11 @@ extern int select_p_update_nodeinfo(struct job_record *job_ptr, return rc; } +extern int select_p_update_part (update_part_msg_t *part_desc_ptr) +{ + return SLURM_SUCCESS; +} + extern int select_p_get_info_from_plugin(enum select_data_info info, void *data) { diff --git a/src/plugins/select/linear/select_linear.c b/src/plugins/select/linear/select_linear.c index 3d7703cba24f4009b8a4c87d3c71044625bb6791..f32153f659021adb02540ba8c79c11bdfb081714 100644 --- a/src/plugins/select/linear/select_linear.c +++ b/src/plugins/select/linear/select_linear.c @@ -627,6 +627,11 @@ extern int select_p_update_nodeinfo (struct job_record *job_ptr, return SLURM_SUCCESS; } +extern int select_p_update_part (update_part_msg_t *part_desc_ptr) +{ + return SLURM_SUCCESS; +} + extern int select_p_get_extra_jobinfo (struct node_record *node_ptr, struct job_record *job_ptr, enum select_data_info info, diff --git a/src/scontrol/scontrol.c b/src/scontrol/scontrol.c index e09da89c17196940a0051a4811f261cb37b354de..8ca829a8c7a723b4f3af3e31e5eed1eb8134adf6 100644 --- a/src/scontrol/scontrol.c +++ b/src/scontrol/scontrol.c @@ -113,6 +113,7 @@ static void _update_it (int argc, char *argv[]); static int _update_job (int argc, char *argv[]); static int _update_node (int argc, char *argv[]); static int _update_part (int argc, char *argv[]); +static int _update_bluegene_block (int argc, char *argv[]); static void _usage (); int @@ -426,8 +427,8 @@ _load_partitions (partition_info_msg_t **part_buffer_pptr) } } else - error_code = slurm_load_partitions ((time_t) NULL, - &part_info_ptr, show_flags); + error_code = slurm_load_partitions((time_t) NULL, + &part_info_ptr, show_flags); if (error_code == SLURM_SUCCESS) { old_part_info_ptr = part_info_ptr; @@ -1361,15 +1362,17 @@ _update_it (int argc, char *argv[]) if (strncasecmp (argv[i], "NodeName=", 9) == 0) { error_code = _update_node (argc, argv); break; - } - else if (strncasecmp (argv[i], "PartitionName=", 14) == 0) { + } else if (strncasecmp (argv[i], "PartitionName=", 14) == 0) { error_code = _update_part (argc, argv); break; - } - else if (strncasecmp (argv[i], "JobId=", 6) == 0) { + } else if (strncasecmp (argv[i], "JobId=", 6) == 0) { error_code = _update_job (argc, argv); break; + } else if (strncasecmp (argv[i], "BlockName=", 10) == 0) { + error_code = _update_bluegene_block (argc, argv); + break; } + } if (i >= argc) { @@ -1892,6 +1895,55 @@ _update_part (int argc, char *argv[]) return 0; } +/* + * _update_bluegene_block - update the bluegene block per the + * supplied arguments + * IN argc - count of arguments + * IN argv - list of arguments + * RET 0 if no slurm error, errno otherwise. parsing error prints + * error message and returns 0 + */ +static int +_update_bluegene_block (int argc, char *argv[]) +{ +#ifdef HAVE_BG + int i, update_cnt = 0; + update_part_msg_t part_msg; + + slurm_init_part_desc_msg ( &part_msg ); + /* means this is for bluegene */ + part_msg.hidden = (uint16_t)INFINITE; + + for (i=0; i<argc; i++) { + if (strncasecmp(argv[i], "BlockName=", 10) == 0) + part_msg.name = &argv[i][10]; + else if (strncasecmp(argv[i], "State=", 6) == 0) { + if (strcasecmp(&argv[i][6], "ERROR") == 0) + part_msg.state_up = 0; + else if (strcasecmp(&argv[i][6], "FREE") == 0) + part_msg.state_up = 1; + else { + exit_code = 1; + fprintf (stderr, "Invalid input: %s\n", + argv[i]); + fprintf (stderr, "Acceptable State values " + "are FREE and ERROR\n"); + return 0; + } + update_cnt++; + } + } + if (slurm_update_partition(&part_msg)) { + exit_code = 1; + return slurm_get_errno (); + } else + return 0; +#else + printf("This only works on a bluegene system.\n"); + return 0; +#endif +} + /* _usage - show the valid scontrol commands */ void _usage () { diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 9380120f0e27c158ddc58394738cf5fd159862e7..8f2b91466fae72b7cfec20dd7791dc43e4b4ad3f 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -1795,9 +1795,13 @@ static void _slurm_rpc_update_partition(slurm_msg_t * msg) if (error_code == SLURM_SUCCESS) { /* do RPC call */ - lock_slurmctld(part_write_lock); - error_code = update_part(part_desc_ptr); - unlock_slurmctld(part_write_lock); + if(part_desc_ptr->hidden == (uint16_t)INFINITE) + error_code = select_g_update_block(part_desc_ptr); + else { + lock_slurmctld(part_write_lock); + error_code = update_part(part_desc_ptr); + unlock_slurmctld(part_write_lock); + } END_TIMER; }