diff --git a/NEWS b/NEWS index defc3827f365063e1985c8da5c98ac5ae6325e05..a72f14d192c33e87451edc0f0dd4bc1524097b34 100644 --- a/NEWS +++ b/NEWS @@ -329,6 +329,11 @@ documents those changes that are of interest to users and administrators. -- Fix sinfo to display mixed nodes as allocated in '%F' output. -- Sview - Fix cpu and node counts for partitions. -- Ignore NO_VAL in SLURMDB_PURGE_* macros. + -- ALPS - Don't drain nodes if epilog fails. It leaves them in drain state + with no way to get them out. + -- Fix issue with task/affinity oversubscribing cpus erroneously when + using --ntasks-per-node. + -- MYSQL - Fix load of archive files. * Changes in Slurm 14.03.8 ========================== diff --git a/src/plugins/accounting_storage/mysql/as_mysql_archive.c b/src/plugins/accounting_storage/mysql/as_mysql_archive.c index 63d2944a319a0564eb17f82a959a8a7c134e13bd..fa044e9d28e2c7e2ce083f52cbe501eeebf8f81b 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_archive.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_archive.c @@ -1541,11 +1541,11 @@ _load_events(uint16_t rpc_version, Buf buffer, char *cluster_name, xstrcat(insert, ", "); xstrfmtcat(insert, format, + object.period_start, + object.period_end, + object.node_name, object.cluster_nodes, object.cpu_count, - object.node_name, - object.period_end, - object.period_start, object.reason, object.reason_uid, object.state); @@ -1852,15 +1852,15 @@ static char *_load_resvs(uint16_t rpc_version, Buf buffer, xstrcat(insert, ", "); xstrfmtcat(insert, format, + object.id, object.assocs, object.cpus, object.flags, - object.id, - object.name, object.nodes, object.node_inx, - object.time_end, - object.time_start); + object.name, + object.time_start, + object.time_end); } // END_TIMER2("step query"); // info("resv query took %s", TIME_STR); @@ -2018,52 +2018,52 @@ static char *_load_steps(uint16_t rpc_version, Buf buffer, xstrcat(insert, ", "); xstrfmtcat(insert, format, - object.ave_cpu, - object.act_cpufreq, - object.consumed_energy, - object.ave_disk_read, - object.ave_disk_write, - object.ave_pages, - object.ave_rss, - object.ave_vsize, - object.exit_code, - object.cpus, object.id, - object.kill_requid, - object.max_disk_read, - object.max_disk_read_node, - object.max_disk_read_task, - object.max_disk_write, - object.max_disk_write_node, - object.max_disk_write_task, - object.max_pages, - object.max_pages_node, - object.max_pages_task, - object.max_rss, - object.max_rss_node, - object.max_rss_task, - object.max_vsize, - object.max_vsize_node, - object.max_vsize_task, - object.min_cpu, - object.min_cpu_node, - object.min_cpu_task, + object.stepid, + object.period_start, + object.period_end, + object.period_suspended, object.name, object.nodelist, - object.nodes, object.node_inx, - object.period_end, - object.period_start, - object.period_suspended, - object.req_cpufreq, object.state, - object.stepid, - object.sys_sec, - object.sys_usec, + object.kill_requid, + object.exit_code, + object.nodes, + object.cpus, object.tasks, object.task_dist, object.user_sec, - object.user_usec); + object.user_usec, + object.sys_sec, + object.sys_usec, + object.max_vsize, + object.max_vsize_task, + object.max_vsize_node, + object.ave_vsize, + object.max_rss, + object.max_rss_task, + object.max_rss_node, + object.ave_rss, + object.max_pages, + object.max_pages_task, + object.max_pages_node, + object.ave_pages, + object.min_cpu, + object.min_cpu_task, + object.min_cpu_node, + object.ave_cpu, + object.act_cpufreq, + object.consumed_energy, + object.req_cpufreq, + object.max_disk_read, + object.max_disk_read_task, + object.max_disk_read_node, + object.ave_disk_read, + object.max_disk_write, + object.max_disk_write_task, + object.max_disk_write_node, + object.ave_disk_write); } // END_TIMER2("step query"); @@ -2180,10 +2180,10 @@ static char *_load_suspend(uint16_t rpc_version, Buf buffer, xstrcat(insert, ", "); xstrfmtcat(insert, format, - object.associd, object.id, - object.period_end, - object.period_start); + object.associd, + object.period_start, + object.period_end); } // END_TIMER2("suspend query"); diff --git a/src/plugins/task/affinity/dist_tasks.c b/src/plugins/task/affinity/dist_tasks.c index 7ab578d0d9ba185150ed7c18cd33f97ec37b2c46..0bc4b3a7b95fba8c746defd6aa094fd27c088576 100644 --- a/src/plugins/task/affinity/dist_tasks.c +++ b/src/plugins/task/affinity/dist_tasks.c @@ -213,7 +213,7 @@ void batch_bind(batch_job_launch_msg_t *req) } str = (char *)bit_fmt_hexmask(req_map); - debug3("task/affinity: job %u CPU mask from slurmctld: %s", + debug3("task/affinity: job %u core mask from slurmctld: %s", req->job_id, str); xfree(str); @@ -690,7 +690,7 @@ static bitstr_t *_get_avail_map(launch_tasks_request_msg_t *req, } str = (char *)bit_fmt_hexmask(req_map); - debug3("task/affinity: job %u.%u CPU mask from slurmctld: %s", + debug3("task/affinity: job %u.%u core mask from slurmctld: %s", req->job_id, req->job_step_id, str); xfree(str); @@ -878,7 +878,32 @@ static int _task_layout_lllp_cyclic(launch_tasks_request_msg_t *req, last_taskcount = taskcount; for (i = 0; i < size; i++) { bool already_switched = false; - uint16_t bit = socket_last_pu[s] + (s * offset); + uint16_t bit; + uint16_t orig_s = s; + + while (socket_last_pu[s] >= offset) { + /* Switch to the next socket we have + * ran out here. */ + + /* This only happens if the slurmctld + * gave us an allocation that made a + * task split sockets. Or if the + * entire allocation is on one socket. + */ + s = (s + 1) % hw_sockets; + if (orig_s == s) { + /* This should rarely happen, + * but is here for sanity sake. + */ + debug("allocation is full, " + "oversubscribing"); + memset(socket_last_pu, 0, + sizeof(hw_sockets + * sizeof(int))); + } + } + + bit = socket_last_pu[s] + (s * offset); /* In case hardware and config differ */ bit %= size; @@ -888,17 +913,6 @@ static int _task_layout_lllp_cyclic(launch_tasks_request_msg_t *req, /* skip unrequested threads */ if (req->cpu_bind_type & CPU_BIND_ONE_THREAD_PER_CORE) socket_last_pu[s] += hw_threads-1; - if (socket_last_pu[s] >= offset) { - /* Switch to the next socket we have - * ran out here. */ - - /* This only happens if the slurmctld - gave us an allocation that made a - task split sockets. - */ - s = (s + 1) % hw_sockets; - already_switched = true; - } if (!bit_test(avail_map, bit)) continue; diff --git a/src/slurmctld/node_mgr.c b/src/slurmctld/node_mgr.c index ce258b6af46cfe22d8ba7c12b7f6899c1cca68c8..6c5222162fb3913cb4a01b80c9610444b946027c 100644 --- a/src/slurmctld/node_mgr.c +++ b/src/slurmctld/node_mgr.c @@ -1817,6 +1817,12 @@ extern int drain_nodes ( char *nodes, char *reason, uint32_t reason_uid ) return ESLURM_INVALID_NODE_NAME; } +#ifdef HAVE_ALPS_CRAY + error("We cannot drain nodes on a Cray/ALPS system, " + "use native Cray tools such as xtprocadmin(8)."); + return SLURM_SUCCESS; +#endif + if ( (host_list = hostlist_create (nodes)) == NULL) { error ("hostlist_create error on %s: %m", nodes); return ESLURM_INVALID_NODE_NAME; diff --git a/testsuite/expect/globals b/testsuite/expect/globals index 85898c154773455fa9a10ae928a1e23114dbb92c..abdd7064a0e233bf54b250d31018e21f6ec62c22 100755 --- a/testsuite/expect/globals +++ b/testsuite/expect/globals @@ -778,6 +778,37 @@ proc wait_for_all_jobs { job_name incr_sleep } { return $matches } +################################################################ +# +# Proc: test_fast_schedule_2 +# +# Purpose: Determine if we are runnning FastSchedule=2 usually meaning we are pretending we have more resources than reality. +# This is based upon +# the value of FastSchedule in the slurm.conf. +# +# Returns level of 1 if it is 2, 0 otherwise +# +################################################################ +proc test_fast_schedule_2 { } { + global scontrol number + + log_user 0 + set is_2 0 + spawn $scontrol show config + expect { + -re "FastSchedule *= 2" { + set is_2 1 + exp_continue + } + eof { + wait + } + } + + log_user 1 + return $is_2 +} + ################################################################ # diff --git a/testsuite/expect/test1.91 b/testsuite/expect/test1.91 index 36c660fe474bb7f04685a205092b8f90964db106..0de1d9cbddfebe26955a0330662684fc53ae68b9 100755 --- a/testsuite/expect/test1.91 +++ b/testsuite/expect/test1.91 @@ -520,10 +520,14 @@ expect { if {$exit_code == 0} { exec $bin_rm -f $file_prog send_user "\nSUCCESS\n" -} else { +} elseif { [test_fast_schedule_2] } { + exec $bin_rm -f $file_prog send_user "\nNOTE: This test can fail if the node configuration in slurm.conf\n" send_user " (sockets, cores, threads) differs from the actual configuration\n" send_user " or if using task/cgroup without task/affinity.\n" +} else { + send_user "\nFAILURE\n" + set exit_code 1 } exit $exit_code