diff --git a/NEWS b/NEWS index 3ecd66bc08a78127b4216474e80301440a1595cb..8e4f365d054d76112ac1e932091201e88691ddaa 100644 --- a/NEWS +++ b/NEWS @@ -88,6 +88,8 @@ documents those changes that are of interest to users and admins. * Changes in Slurm 14.03.5 ========================== + -- Enlarge the width specifier when printing partition SHARE + to display larger sharing values. * Changes in Slurm 14.03.4 ========================== @@ -192,6 +194,9 @@ documents those changes that are of interest to users and admins. jobs. -- For "scontrol --details show job" report the correct CPU_IDs when thre are multiple threads per core (we are translating a core bitmap to CPU IDs). + -- If DebugFlags=Protocol is configured in slurm.conf print details of the + connection, ip address and port accepted by the controller. + -- Fix minor memory leak when reading in incomplete node data checkpoint file. * Changes in Slurm 14.03.3-2 ============================ diff --git a/doc/man/man1/sinfo.1 b/doc/man/man1/sinfo.1 index 97cc8b68adac97c11c8cc12c453f0daa8bd73416..60db3b51ec3f2b9e6c07e3c46ca1eeb465463c74 100644 --- a/doc/man/man1/sinfo.1 +++ b/doc/man/man1/sinfo.1 @@ -92,7 +92,7 @@ when running with various options are "%9P %.5a %.10l %.16F %N" .TP .I "\-\-long" -"%9P %.5a %.10l %.10s %.4r %.5h %.10g %.6D %.11T %N" +"%9P %.5a %.10l %.10s %.4r %.8h %.10g %.6D %.11T %N" .TP .I "\-\-Node" "%N %.6D %.9P %6t" diff --git a/src/plugins/select/cons_res/job_test.c b/src/plugins/select/cons_res/job_test.c index 7eddf74dc680f6a7c5454864a9a15b7621cfcf23..5f12aec4ef72bd2799621931c8e5d96af21b2b25 100644 --- a/src/plugins/select/cons_res/job_test.c +++ b/src/plugins/select/cons_res/job_test.c @@ -721,8 +721,7 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, /* non-resource-sharing node check */ } else if (node_usage[i].node_state >= NODE_CR_ONE_ROW) { - if ((job_node_req == NODE_CR_RESERVED) || - (job_node_req == NODE_CR_AVAILABLE)) { + if (job_node_req == NODE_CR_RESERVED) { debug3("cons_res: _vns: node %s non-sharing", node_ptr->name); goto clear_bit; diff --git a/src/plugins/select/serial/job_test.c b/src/plugins/select/serial/job_test.c index 61f6cadf64f698df23b1d51d52ddf5c52ea42bfd..8c288099fa0a8a279297eca0c7a3fee565f5d1e2 100644 --- a/src/plugins/select/serial/job_test.c +++ b/src/plugins/select/serial/job_test.c @@ -285,8 +285,7 @@ static int _verify_node_state(struct part_res_record *cr_part_ptr, /* non-resource-sharing node check */ } else if (node_usage[i].node_state >= NODE_CR_ONE_ROW) { - if ((job_node_req == NODE_CR_RESERVED) || - (job_node_req == NODE_CR_AVAILABLE)) { + if (job_node_req == NODE_CR_RESERVED) { debug3("select/serial: node %s non-sharing", node_ptr->name); goto clear_bit; diff --git a/src/sinfo/opts.c b/src/sinfo/opts.c index cae1e3eda5ddd6a32014f10c30657cb0ee3b3256..ab91ad61d517674c8f6f708d3f0c80e740e3a837 100644 --- a/src/sinfo/opts.c +++ b/src/sinfo/opts.c @@ -287,7 +287,7 @@ extern void parse_command_line(int argc, char *argv[]) } else { params.part_field_flag = true; /* compute size later */ params.format = params.long_output ? - "%9P %.5a %.10l %.10s %.4r %.5h %.10g %.6D %.11T %N" : + "%9P %.5a %.10l %.10s %.4r %.8h %.10g %.6D %.11T %N" : "%9P %.5a %.10l %.6D %.6t %N"; } } diff --git a/src/slurmctld/controller.c b/src/slurmctld/controller.c index f1581cfff024f2551f26b1ea2694f7fd28a281a6..5f45fe196d1b39a4f370db61456eab8dd1544b60 100644 --- a/src/slurmctld/controller.c +++ b/src/slurmctld/controller.c @@ -87,6 +87,7 @@ #include "src/common/uid.h" #include "src/common/xsignal.h" #include "src/common/xstring.h" +#include "src/common/slurm_protocol_interface.h" #include "src/slurmctld/acct_policy.h" #include "src/slurmctld/agent.h" @@ -993,6 +994,15 @@ static void *_slurmctld_rpc_mgr(void *no_data) conn_arg->newsockfd = newsockfd; memcpy(&conn_arg->cli_addr, &cli_addr, sizeof(slurm_addr_t)); + if (slurmctld_conf.debug_flags & DEBUG_FLAG_PROTOCOL) { + char inetbuf[64]; + + _slurm_print_slurm_addr(&cli_addr, + inetbuf, + sizeof(inetbuf)); + info("%s: accept() connection from %s", __func__, inetbuf); + } + if (slurmctld_config.shutdown_time) no_thread = 1; else if (pthread_create(&thread_id_rpc_req, diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index 4cc5e1ac5f50be48c4dbbf45e48282dd6d63f83c..d07b2a8e7cb47e7835d633e22289955918d23d96 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -643,7 +643,9 @@ unpack_error: list_destroy(gres_list); gres_list = NULL; } - xfree (node_name); + xfree(comm_name); + xfree(node_hostname); + xfree(node_name); xfree(reason); goto fini; } diff --git a/src/slurmctld/node_scheduler.c b/src/slurmctld/node_scheduler.c index b1f12dae84c1ee214ffd3cc3ef1ea43ea3bb6bf7..44eeb9602b4013d4c9992d8d63f4b6c5b1dc7210 100644 --- a/src/slurmctld/node_scheduler.c +++ b/src/slurmctld/node_scheduler.c @@ -684,8 +684,8 @@ static int _match_feature(char *seek, struct node_set *node_set_ptr) * 1 = exclusive * * Return values: - * 0 = no sharing - * 1 = share resources + * 0 = requires idle nodes + * 1 = can use non-idle nodes */ static int _resolve_shared_status(struct job_record *job_ptr, uint16_t part_max_share, @@ -694,31 +694,36 @@ _resolve_shared_status(struct job_record *job_ptr, uint16_t part_max_share, /* no sharing if partition Shared=EXCLUSIVE */ if (part_max_share == 0) { job_ptr->details->whole_node = 1; + job_ptr->details->share_res = 0; return 0; } /* sharing if partition Shared=FORCE with count > 1 */ if ((part_max_share & SHARED_FORCE) && - ((part_max_share & (~SHARED_FORCE)) > 1)) + ((part_max_share & (~SHARED_FORCE)) > 1)) { + job_ptr->details->share_res = 1; return 1; + } if (cons_res_flag) { - if (part_max_share == 1) /* partition configured Shared=NO */ - return 0; if ((job_ptr->details->share_res == 0) || - (job_ptr->details->share_res == (uint8_t) NO_VAL) || - (job_ptr->details->whole_node == 1)) + (job_ptr->details->whole_node == 1)) { + job_ptr->details->share_res = 0; return 0; + } return 1; } else { job_ptr->details->whole_node = 1; - if (part_max_share == 1) /* partition configured Shared=NO */ + if (part_max_share == 1) { /* partition configured Shared=NO */ + job_ptr->details->share_res = 0; return 0; + } /* share if the user requested it */ if (job_ptr->details->share_res == 1) return 1; + job_ptr->details->share_res = 0; + return 0; } - return 0; } /* @@ -1081,7 +1086,6 @@ _pick_best_nodes(struct node_set *node_set_ptr, int node_set_size, shared = _resolve_shared_status(job_ptr, part_ptr->max_share, cr_enabled); - job_ptr->details->share_res = shared; if (cr_enabled) job_ptr->cr_enabled = cr_enabled; /* CR enabled for this job */