From f17764718e11f4fc18b30a84f2d5b4273fe6e617 Mon Sep 17 00:00:00 2001 From: Brian Christiansen <brian@schedmd.com> Date: Wed, 31 Jan 2018 16:43:11 -0700 Subject: [PATCH] Revert "When submitting a --test-only job respect the -M option." This reverts commit 516b0d598fb22ce9f00265577dc14771cfba41ab. With the fixing of the NEWS file. We want to keep the idea of only checking one federation. --- NEWS | 1 - src/api/allocate.c | 12 +----------- src/common/slurmdb_defs.c | 11 +++++++++++ 3 files changed, 12 insertions(+), 12 deletions(-) diff --git a/NEWS b/NEWS index 24817d80622..f2fd9cf1e10 100644 --- a/NEWS +++ b/NEWS @@ -62,7 +62,6 @@ documents those changes that are of interest to users and administrators. job container where if the step was canceled would also cancel the stepd erroneously. -- Make sure the slurmstepd blocks signals like SIGTERM correctly. - -- When submitting a --test-only job respect the -M option. -- SPANK - When slurm_spank_init_post_opt() fails return error correctly. -- When revoking a sibling job in the federation we want to send a start message before purging the job record to get the uid of the revoked job. diff --git a/src/api/allocate.c b/src/api/allocate.c index f28cdb84c2a..d3a45fc4f15 100644 --- a/src/api/allocate.c +++ b/src/api/allocate.c @@ -604,17 +604,7 @@ int slurm_job_will_run(job_desc_msg_t *req) cluster_name = working_cluster_rec->name; else cluster_name = slurmctld_conf.cluster_name; - - /* - * If clusters is defined then slurmdb_get_first_avail_cluster() has - * already been called and figured out the fastest cluster and a - * will_run to the cluster is all that is needed. However if specific - * clusters have been requested and the local cluster is in a federated - * then check all of the clusters in the federation for the fastest - * cluster. - */ - if (!req->clusters && - !slurm_load_federation(&ptr) && + if (!slurm_load_federation(&ptr) && cluster_in_federation(ptr, cluster_name)) rc = _fed_job_will_run(req, &will_run_resp, ptr); else diff --git a/src/common/slurmdb_defs.c b/src/common/slurmdb_defs.c index 54cf55f6fa3..fbf8dd6e0ec 100644 --- a/src/common/slurmdb_defs.c +++ b/src/common/slurmdb_defs.c @@ -3006,6 +3006,7 @@ extern int slurmdb_get_first_avail_cluster(job_desc_msg_t *req, ListIterator itr; List cluster_list = NULL; List ret_list = NULL; + List tried_feds = list_create(NULL); *cluster_rec = NULL; cluster_list = slurmdb_get_info_cluster(cluster_names); @@ -3030,14 +3031,24 @@ extern int slurmdb_get_first_avail_cluster(job_desc_msg_t *req, ret_list = list_create(_destroy_local_cluster_rec); itr = list_iterator_create(cluster_list); while ((working_cluster_rec = list_next(itr))) { + /* only try one cluster from each federation */ + if (working_cluster_rec->fed.id && + list_find_first(tried_feds, slurm_find_char_in_list, + working_cluster_rec->fed.name)) + continue; + if ((local_cluster = _job_will_run(req))) { list_append(ret_list, local_cluster); + if (working_cluster_rec->fed.id) + list_append(tried_feds, + working_cluster_rec->fed.name); } else { error("Problem with submit to cluster %s: %m", working_cluster_rec->name); } } list_iterator_destroy(itr); + FREE_NULL_LIST(tried_feds); /* restore working_cluster_rec in case it was already set */ if (*cluster_rec) { -- GitLab