diff --git a/NEWS b/NEWS index c94c8998db5941d337f3d493135f5e3b2a0c7925..b9bcf7fa22048d21eb375891fcbae35a831573c6 100644 --- a/NEWS +++ b/NEWS @@ -302,6 +302,10 @@ documents those changes that are of interest to users and administrators. -- Prevent negative job array index, which could cause slurmctld to crash. -- Fix issue with squeue/scontrol showing correct node_cnt when only tasks are specified. + -- Check the status of the database connection before using it. + -- ALPS - If an allocation requests -n set the BASIL -N option to the + amount of tasks / number of node. + -- ALPS - Don't set the env var APRUN_DEFAULT_MEMORY, it is not needed anymore. * Changes in Slurm 14.03.9 ========================== diff --git a/src/common/env.c b/src/common/env.c index ebf32edcc8c5228f3267b7fdcd9bb85d28c4b5f8..a3e9b0afd9aa32c209a426682ae6f97d02ba8b82 100644 --- a/src/common/env.c +++ b/src/common/env.c @@ -1015,29 +1015,10 @@ env_array_for_job(char ***dest, const resource_allocation_response_msg_t *alloc, uint32_t tmp_mem = alloc->pn_min_memory & (~MEM_PER_CPU); env_array_overwrite_fmt(dest, "SLURM_MEM_PER_CPU", "%u", tmp_mem); -#ifdef HAVE_ALPS_CRAY - env_array_overwrite_fmt(dest, "APRUN_DEFAULT_MEMORY", "%u", - tmp_mem); -#endif } else if (alloc->pn_min_memory) { uint32_t tmp_mem = alloc->pn_min_memory; -#ifdef HAVE_ALPS_CRAY - uint32_t i, max_cpus_per_node = 1; - for (i = 0; i < alloc->num_cpu_groups; i++) { - if ((i == 0) || - (max_cpus_per_node < alloc->cpus_per_node[i])) { - max_cpus_per_node = alloc->cpus_per_node[i]; - } - } - tmp_mem /= max_cpus_per_node; - env_array_overwrite_fmt(dest, "APRUN_DEFAULT_MEMORY", "%u", - tmp_mem); - env_array_overwrite_fmt(dest, "SLURM_MEM_PER_CPU", "%u", - tmp_mem); -#else env_array_overwrite_fmt(dest, "SLURM_MEM_PER_NODE", "%u", tmp_mem); -#endif } /* OBSOLETE, but needed by MPI, do not remove */ @@ -1211,28 +1192,10 @@ env_array_for_batch_job(char ***dest, const batch_job_launch_msg_t *batch, uint32_t tmp_mem = batch->pn_min_memory & (~MEM_PER_CPU); env_array_overwrite_fmt(dest, "SLURM_MEM_PER_CPU", "%u", tmp_mem); -#ifdef HAVE_ALPS_CRAY - env_array_overwrite_fmt(dest, "CRAY_AUTO_APRUN_OPTIONS", - "\"-m%u\"", tmp_mem); -#endif } else if (batch->pn_min_memory) { uint32_t tmp_mem = batch->pn_min_memory; -#ifdef HAVE_ALPS_CRAY - uint32_t i, max_cpus_per_node = 1; - for (i = 0; i < batch->num_cpu_groups; i++) { - if ((i == 0) || - (max_cpus_per_node < batch->cpus_per_node[i])) { - max_cpus_per_node = batch->cpus_per_node[i]; - } - } -#endif env_array_overwrite_fmt(dest, "SLURM_MEM_PER_NODE", "%u", tmp_mem); -#ifdef HAVE_ALPS_CRAY - tmp_mem /= max_cpus_per_node; - env_array_overwrite_fmt(dest, "CRAY_AUTO_APRUN_OPTIONS", - "\"-m%u\"", tmp_mem); -#endif } return SLURM_SUCCESS; diff --git a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c index 0889035f56f5472a99e5a96fd03fb04d06677b21..34d57a4fe60a7927dbefc172f3a31ac7395a9788 100644 --- a/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c +++ b/src/plugins/accounting_storage/mysql/accounting_storage_mysql.c @@ -2622,11 +2622,6 @@ extern int clusteracct_storage_p_node_down(mysql_conn_t *mysql_conn, time_t event_time, char *reason, uint32_t reason_uid) { - if (!mysql_conn->cluster_name) { - error("%s:%d no cluster name", THIS_FILE, __LINE__); - return SLURM_ERROR; - } - return as_mysql_node_down(mysql_conn, node_ptr, event_time, reason, reason_uid); } @@ -2635,11 +2630,6 @@ extern int clusteracct_storage_p_node_up(mysql_conn_t *mysql_conn, struct node_record *node_ptr, time_t event_time) { - if (!mysql_conn->cluster_name) { - error("%s:%d no cluster name", THIS_FILE, __LINE__); - return SLURM_ERROR; - } - return as_mysql_node_up(mysql_conn, node_ptr, event_time); } @@ -2649,11 +2639,6 @@ extern int clusteracct_storage_p_node_up(mysql_conn_t *mysql_conn, extern int clusteracct_storage_p_register_ctld(mysql_conn_t *mysql_conn, uint16_t port) { - if (!mysql_conn->cluster_name) { - error("%s:%d no cluster name", THIS_FILE, __LINE__); - return SLURM_ERROR; - } - return as_mysql_register_ctld( mysql_conn, mysql_conn->cluster_name, port); } @@ -2666,6 +2651,9 @@ extern uint16_t clusteracct_storage_p_register_disconn_ctld( MYSQL_RES *result = NULL; MYSQL_ROW row; + if (check_connection(mysql_conn) != SLURM_SUCCESS) + return ESLURM_DB_CONNECTION; + if (!mysql_conn->cluster_name) { error("%s:%d no cluster name", THIS_FILE, __LINE__); return control_port; @@ -2713,6 +2701,9 @@ extern uint16_t clusteracct_storage_p_register_disconn_ctld( extern int clusteracct_storage_p_fini_ctld(mysql_conn_t *mysql_conn, slurmdb_cluster_rec_t *cluster_rec) { + if (check_connection(mysql_conn) != SLURM_SUCCESS) + return ESLURM_DB_CONNECTION; + if (!cluster_rec || (!mysql_conn->cluster_name && !cluster_rec->name)) { error("%s:%d no cluster name", THIS_FILE, __LINE__); return SLURM_ERROR; @@ -2729,11 +2720,6 @@ extern int clusteracct_storage_p_cluster_cpus(mysql_conn_t *mysql_conn, uint32_t cpus, time_t event_time) { - if (!mysql_conn->cluster_name) { - error("%s:%d no cluster name", THIS_FILE, __LINE__); - return SLURM_ERROR; - } - return as_mysql_cluster_cpus(mysql_conn, cluster_nodes, cpus, event_time); } diff --git a/src/plugins/accounting_storage/mysql/as_mysql_cluster.c b/src/plugins/accounting_storage/mysql/as_mysql_cluster.c index f480ed32b93bf8fb5e2873341a0005c11eab25b4..664b6fdad3ab215b6af1aa572a22c08d9f5f6c08 100644 --- a/src/plugins/accounting_storage/mysql/as_mysql_cluster.c +++ b/src/plugins/accounting_storage/mysql/as_mysql_cluster.c @@ -1046,6 +1046,11 @@ extern int as_mysql_node_down(mysql_conn_t *mysql_conn, if (check_connection(mysql_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; + if (!mysql_conn->cluster_name) { + error("%s:%d no cluster name", THIS_FILE, __LINE__); + return SLURM_ERROR; + } + if (!node_ptr) { error("No node_ptr given!"); return SLURM_ERROR; @@ -1104,6 +1109,11 @@ extern int as_mysql_node_up(mysql_conn_t *mysql_conn, if (check_connection(mysql_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; + if (!mysql_conn->cluster_name) { + error("%s:%d no cluster name", THIS_FILE, __LINE__); + return SLURM_ERROR; + } + query = xstrdup_printf( "update \"%s_%s\" set time_end=%ld where " "time_end=0 and node_name='%s';", @@ -1133,6 +1143,11 @@ extern int as_mysql_register_ctld(mysql_conn_t *mysql_conn, if (check_connection(mysql_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; + if (!mysql_conn->cluster_name) { + error("%s:%d no cluster name", THIS_FILE, __LINE__); + return SLURM_ERROR; + } + if (!mysql_conn->cluster_name) mysql_conn->cluster_name = xstrdup(cluster); @@ -1254,6 +1269,11 @@ extern int as_mysql_cluster_cpus(mysql_conn_t *mysql_conn, if (check_connection(mysql_conn) != SLURM_SUCCESS) return ESLURM_DB_CONNECTION; + if (!mysql_conn->cluster_name) { + error("%s:%d no cluster name", THIS_FILE, __LINE__); + return SLURM_ERROR; + } + /* Record the processor count */ query = xstrdup_printf( "select cpu_count, cluster_nodes from \"%s_%s\" where " diff --git a/src/plugins/launch/aprun/launch_aprun.c b/src/plugins/launch/aprun/launch_aprun.c index fc49587529a4de80b32966f83e105573f2c2b6ba..5c3a12b5ccd34eab3024d5d9c7972864de1e8a8b 100644 --- a/src/plugins/launch/aprun/launch_aprun.c +++ b/src/plugins/launch/aprun/launch_aprun.c @@ -692,20 +692,6 @@ extern int launch_p_create_job_step(srun_job_t *job, bool use_all_cpus, void (*signal_function)(int), sig_atomic_t *destroy_job) { - char value[32]; - - /* If srun is call directly this wasn't figured out until - later if the user used --mem. The problem here is this - will not work with --launch_cmd since that doesn't go get - an actual allocation (which is where pn_min_memory is decided). - */ - if ((opt.mem_per_cpu == NO_VAL) - && global_resp && (global_resp->pn_min_memory & MEM_PER_CPU)) { - snprintf(value, sizeof(value), "%u", - global_resp->pn_min_memory & (~MEM_PER_CPU)); - setenv("APRUN_DEFAULT_MEMORY", value, 1); - } - if (opt.launch_cmd) { int i = 0; char *cmd_line = NULL; diff --git a/src/plugins/select/alps/basil_interface.c b/src/plugins/select/alps/basil_interface.c index 81396588a8c2905128cefd365311c6dc9b39f7c2..b257e6aaaa0ce3ac7ae894ab378dc0de4311b3a0 100644 --- a/src/plugins/select/alps/basil_interface.c +++ b/src/plugins/select/alps/basil_interface.c @@ -748,7 +748,13 @@ extern int do_basil_reserve(struct job_record *job_ptr) if (cray_conf->sub_alloc) { mppdepth = MAX(1, job_ptr->details->cpus_per_task); - mppnppn = job_ptr->details->ntasks_per_node; + if (!job_ptr->details->ntasks_per_node + && job_ptr->details->num_tasks) { + mppnppn = (job_ptr->details->num_tasks + + job_ptr->job_resrcs->nhosts - 1) / + job_ptr->job_resrcs->nhosts; + } else + mppnppn = job_ptr->details->ntasks_per_node; } else { /* always be 1 */ mppdepth = 1;