diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 644c3cad98f73ab172d01128563178cb0043a9e4..5ef62e0fb08ce1547e44c1bee73889b69153dc53 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -127,12 +127,13 @@ Checkpoint files will be of the form "<job_id>.ckpt" for jobs and "<job_id>.<step_id>.ckpt" for job steps. .TP -\fB\-\-cluster\fR=<\fIstring\fR> -Cluster to issue commands to. Multiple cluster names may be comma separated. +\fB\-\-clusters\fR=<\fIstring\fR> +Clusters to issue commands to. Multiple cluster names may be comma separated. The job will be submitted to the one cluster providing the earliest expected -job initiation time. The default value is the current cluster. If no name is -specified, the current cluster will be used. Note the \fB\-\-export\fR option -to control environment variables exported between clusters. +job initiation time. The default value is the current cluster. A value of +of '\fIall\fR' will query to run on all clusters. Note the +\fB\-\-export\fR option to control environment variables exported +between clusters. .TP \fB\-\-comment\fR=<\fIstring\fR> diff --git a/src/common/slurmdb_defs.c b/src/common/slurmdb_defs.c index 971056ceca0fe0b963033825897084de8bd5d0e7..b0c390b6b18d128703c0b4fee072b31ef7a3f3ae 100644 --- a/src/common/slurmdb_defs.c +++ b/src/common/slurmdb_defs.c @@ -1009,13 +1009,17 @@ extern List slurmdb_get_info_cluster(char *cluster_names) void *db_conn = NULL; ListIterator itr, itr2; int err = 0; + bool all_clusters = 0; + + if(!strcmp(cluster_names, "all")) + all_clusters = 1; cluster_name = slurm_get_cluster_name(); db_conn = acct_storage_g_get_connection(false, 0, 1, cluster_name); xfree(cluster_name); memset(&cluster_cond, 0, sizeof(slurmdb_cluster_cond_t)); - if(cluster_names) { + if(cluster_names && !all_clusters) { cluster_cond.cluster_list = list_create(slurm_destroy_char); slurm_addto_char_list(cluster_cond.cluster_list, cluster_names); } @@ -1026,7 +1030,7 @@ extern List slurmdb_get_info_cluster(char *cluster_names) goto end_it; } itr = list_iterator_create(temp_list); - if(!cluster_names) { + if(!cluster_names || all_clusters) { while((cluster_rec = list_next(itr))) { if(_setup_cluster_rec(cluster_rec) != SLURM_SUCCESS) { err = 1; diff --git a/src/sbatch/mult_cluster.c b/src/sbatch/mult_cluster.c index 73eb76ecb4b0f7edcc969b46f060a36c294aa391..2b203317f0cd8e60c2c47bd8b6762e01e1953856 100644 --- a/src/sbatch/mult_cluster.c +++ b/src/sbatch/mult_cluster.c @@ -97,18 +97,15 @@ local_cluster_rec_t *_job_will_run (job_desc_msg_t *req) rc = slurm_send_recv_controller_msg(&req_msg, &resp_msg); if (rc < 0) { - errno = SLURM_SOCKET_ERROR; + slurm_seterrno(SLURM_SOCKET_ERROR); return NULL; } - switch (resp_msg.msg_type) { case RESPONSE_SLURM_RC: rc = ((return_code_msg_t *) resp_msg.data)->return_code; slurm_free_return_code_msg(resp_msg.data); - if (rc < 0) { - errno = SLURM_PROTOCOL_ERROR; - return NULL; - } + if (rc) + slurm_seterrno(rc); break; case RESPONSE_JOB_WILL_RUN: if(working_cluster_rec->flags & CLUSTER_FLAG_BG) @@ -144,14 +141,12 @@ local_cluster_rec_t *_job_will_run (job_desc_msg_t *req) } slurm_free_will_run_response_msg(will_run_resp); - errno = 0; break; default: - errno = SLURM_UNEXPECTED_MSG_ERROR; + slurm_seterrno(SLURM_UNEXPECTED_MSG_ERROR); return NULL; break; } - return local_cluster; } @@ -187,7 +182,7 @@ extern int sbatch_set_first_avail_cluster(job_desc_msg_t *req) _destroy_local_cluster_rec); list_append(ret_list, local_cluster); } else - error("problem talking to cluster %s: %m", + error("Problem with submit to cluster %s: %m", working_cluster_rec->name); } list_iterator_destroy(itr); diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index 9ad01faae47684e6cbdc496a09b9d3b2db21e6d3..5afae22b2715c3cd0f25c5c438f156d4fe940017 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -656,7 +656,7 @@ static struct option long_options[] = { {"no-kill", no_argument, 0, 'k'}, {"licenses", required_argument, 0, 'L'}, {"distribution", required_argument, 0, 'm'}, - {"cluster", required_argument, 0, 'M'}, + {"clusters", required_argument, 0, 'M'}, {"tasks", required_argument, 0, 'n'}, {"ntasks", required_argument, 0, 'n'}, {"nodes", required_argument, 0, 'N'}, @@ -1214,7 +1214,7 @@ static void _set_options(int argc, char **argv) list_destroy(opt.clusters); if(!(opt.clusters = slurmdb_get_info_cluster(optarg))) { - error("'%s' invalid entry for --cluster", + error("'%s' invalid entry for --clusters", optarg); exit(1); } @@ -2667,7 +2667,7 @@ static void _usage(void) " [-c ncpus] [-r n] [-p partition] [--hold] [-t minutes]\n" " [-D path] [--immediate] [--no-kill] [--overcommit]\n" " [--input file] [--output file] [--error file]\n" -" [--time-min=minutes] [--licenses=names] [--cluster=cluster_name]\n" +" [--time-min=minutes] [--licenses=names] [--clusters=cluster_names]\n" " [--workdir=directory] [--share] [-m dist] [-J jobname]\n" " [--jobid=id] [--verbose] [--gid=group] [--uid=user] [-W sec] \n" " [--contiguous] [--mincpus=n] [--mem=MB] [--tmp=MB] [-C list]\n" @@ -2718,9 +2718,9 @@ static void _help(void) " -L, --licenses=names required license, comma separated\n" " -m, --distribution=type distribution method for processes to nodes\n" " (type = block|cyclic|arbitrary)\n" -" -M, --cluster=cluster_name cluster to issue commands to. Default is\n" -" current cluster. cluster with no name will\n" -" reset to default.\n" +" -M, --clusters=names Comma separated list of clusters to issue\n" +" commands to. Default is current cluster.\n" +" Name of 'all' will submit to run on all clusters.\n" " --mail-type=type notify on state change: BEGIN, END, FAIL or ALL\n" " --mail-user=user who to send email notification for job state\n" " changes\n"