From 2af5ce336d24813d498efed9cedcae1d7329ebff Mon Sep 17 00:00:00 2001 From: Rod Schultz <Rod.Schultz@Bull.com> Date: Tue, 8 Jan 2013 10:04:37 -0800 Subject: [PATCH] Report node state as MAINT only if not allocated jobs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One of our testers has observed that when a long running job continues to run after a maintenance reservation comes into effect sinfo reports the node as being in the allocated state while scontrol shows it to be in the maintenance state. This can happen when a node is not completely allocated. (select cons_res, a partition which is not Shared=EXCLUSIVE, jobs allocated without –exclusive, or jobs that are allocated only some of the cpus on a node.) Execution paths leading up to calls to node_state_string (slurm_protocol_defs.c) or node_state_string_compact, in scontrol, test for allocated_cpus less that total_cpus on the node and set the node state to MIXED rather than ALLOCATED, while similar paths in sinfo do not. I think this is probably a bug, since the mixed state is defined and think it is desirable that both command return the same result. The problem can be fixed with two logic changes (in multiple places) 1) node_state_string and node_state_string_compact have to check for mixed as well as allocated before returning the MAINT state. This means that the reported state for the node with the allocated job will be MIXED. 2) Sinfo must also check allocated_cpus less than total_cpus and set the state to MIXED before calling either node_state_string or node_state_string_compact. The attached patch (against 2.5.1) makes these changes. The attached script is a test case. --- src/common/slurm_protocol_defs.c | 13 +++++++++---- src/sinfo/print.c | 27 ++++++++++++++++++++++----- 2 files changed, 31 insertions(+), 9 deletions(-) diff --git a/src/common/slurm_protocol_defs.c b/src/common/slurm_protocol_defs.c index 5aa62a482b0..a5cdeb82c4b 100644 --- a/src/common/slurm_protocol_defs.c +++ b/src/common/slurm_protocol_defs.c @@ -1440,9 +1440,12 @@ extern char *node_state_string(uint16_t inx) bool power_up_flag = (inx & NODE_STATE_POWER_UP); if (maint_flag) { - if (no_resp_flag) + if ((base == NODE_STATE_ALLOCATED) || + (base == NODE_STATE_MIXED)) + ; + else if (no_resp_flag) return "MAINT*"; - if (base != NODE_STATE_ALLOCATED) + else return "MAINT"; } if (drain_flag) { @@ -1558,9 +1561,11 @@ extern char *node_state_string_compact(uint16_t inx) inx = (uint16_t) (inx & NODE_STATE_BASE); if (maint_flag) { - if (no_resp_flag) + if ((inx == NODE_STATE_ALLOCATED) || (inx == NODE_STATE_MIXED)) + ; + else if (no_resp_flag) return "MAINT*"; - if (inx != NODE_STATE_ALLOCATED) + else return "MAINT"; } if (drain_flag) { diff --git a/src/sinfo/print.c b/src/sinfo/print.c index e4c391779e2..a8c0da53418 100644 --- a/src/sinfo/print.c +++ b/src/sinfo/print.c @@ -988,10 +988,18 @@ int _print_size(sinfo_data_t * sinfo_data, int width, int _print_state_compact(sinfo_data_t * sinfo_data, int width, bool right_justify, char *suffix) { + char *upper_state, *lower_state; + uint16_t my_state; + if (sinfo_data && sinfo_data->nodes_total) { - char *upper_state = node_state_string_compact( - sinfo_data->node_state); - char *lower_state = _str_tolower(upper_state); + my_state = sinfo_data->node_state; + if (sinfo_data->cpus_alloc && + (sinfo_data->cpus_alloc != sinfo_data->cpus_total)) { + my_state &= NODE_STATE_FLAGS; + my_state |= NODE_STATE_MIXED; + } + upper_state = node_state_string_compact(my_state); + lower_state = _str_tolower(upper_state); _print_str(lower_state, width, right_justify, true); xfree(lower_state); } else if (sinfo_data) @@ -1007,9 +1015,18 @@ int _print_state_compact(sinfo_data_t * sinfo_data, int width, int _print_state_long(sinfo_data_t * sinfo_data, int width, bool right_justify, char *suffix) { + char *upper_state, *lower_state; + uint16_t my_state; + if (sinfo_data && sinfo_data->nodes_total) { - char *upper_state = node_state_string(sinfo_data->node_state); - char *lower_state = _str_tolower(upper_state); + my_state = sinfo_data->node_state; + if (sinfo_data->cpus_alloc && + (sinfo_data->cpus_alloc != sinfo_data->cpus_total)) { + my_state &= NODE_STATE_FLAGS; + my_state |= NODE_STATE_MIXED; + } + upper_state = node_state_string(my_state); + lower_state = _str_tolower(upper_state); _print_str(lower_state, width, right_justify, true); xfree(lower_state); } else if (sinfo_data) -- GitLab