diff --git a/doc/man/man1/sbatch.1 b/doc/man/man1/sbatch.1 index 2844d35184c92cf77a934911ade01d7de6a7baca..8a87393d7d9f4a016d03dbc3f62f03d6c8052497 100644 --- a/doc/man/man1/sbatch.1 +++ b/doc/man/man1/sbatch.1 @@ -208,10 +208,12 @@ Checkpoint files will be of the form "<job_id>.ckpt" for jobs and "<job_id>.<step_id>.ckpt" for job steps. .TP -\fB\-\-cluster\-constraint\fR=<\fIlist\fR> +\fB\-\-cluster\-constraint\fR=[!]<\fIlist\fR> Specifies features that a federated cluster must have to have a sibling job submitted to it. Slurm will attempt to submit a sibling job to a cluster if it -has at least one of the specified features. +has at least one of the specified features. If the "!" option is included, Slurm +will attempt to submit a sibling job to a cluster that has none of the specified +features. .TP \fB\-\-comment\fR=<\fIstring\fR> diff --git a/doc/man/man5/slurm.conf.5 b/doc/man/man5/slurm.conf.5 index 435fec23a8608af26c6aa9ce1099065df0e69a73..cd3aa68c922fc4292127f143a5b940e676e6ff66 100644 --- a/doc/man/man5/slurm.conf.5 +++ b/doc/man/man5/slurm.conf.5 @@ -875,7 +875,7 @@ Used to define federation options. Multiple options may be comma separated. .TP \fBfed_display\fR If set, then the client status commands (e.g. squeue, sinfo, sprio, etc.) will -display infomation in a federated view by default. This option is functionaly +display information in a federated view by default. This option is functionally equivalent to using the \-\-federation options on each command. Use the client's \-\-local option to override the federated view and get a local view of the given cluster. diff --git a/src/common/slurm_protocol_api.c b/src/common/slurm_protocol_api.c index 90a0d4cac2a83fec6975af26321ffa98d99a5157..aba35bba2dd47ab383916d13a4da82a8d177a1a0 100644 --- a/src/common/slurm_protocol_api.c +++ b/src/common/slurm_protocol_api.c @@ -4349,6 +4349,7 @@ extern int slurm_send_recv_controller_msg(slurm_msg_t * request_msg, uint16_t slurmctld_timeout; slurm_addr_t ctrl_addr; static bool use_backup = false; + slurmdb_cluster_rec_t *save_comm_cluster_rec = comm_cluster_rec; /* Just in case the caller didn't initialize his slurm_msg_t, and * since we KNOW that we are only sending to one node (the controller), @@ -4358,6 +4359,8 @@ extern int slurm_send_recv_controller_msg(slurm_msg_t * request_msg, request_msg->ret_list = NULL; request_msg->forward_struct = NULL; +tryagain: + retry = 1; if (comm_cluster_rec) request_msg->flags |= SLURM_GLOBAL_AUTH_KEY; @@ -4409,6 +4412,24 @@ extern int slurm_send_recv_controller_msg(slurm_msg_t * request_msg, break; } + if (!rc && (response_msg->msg_type == RESPONSE_SLURM_REROUTE_MSG)) { + reroute_msg_t *rr_msg = (reroute_msg_t *)response_msg->data; + + /* Don't expect mutliple hops but in the case it does + * happen, free the previous rr cluster_rec. */ + if (comm_cluster_rec && + (comm_cluster_rec != save_comm_cluster_rec)) + slurmdb_destroy_cluster_rec(comm_cluster_rec); + + comm_cluster_rec = rr_msg->working_cluster_rec; + slurmdb_setup_cluster_rec(comm_cluster_rec); + rr_msg->working_cluster_rec = NULL; + goto tryagain; + } + + if (comm_cluster_rec != save_comm_cluster_rec) + slurmdb_destroy_cluster_rec(comm_cluster_rec); + cleanup: if (rc != 0) _remap_slurmctld_errno(); @@ -4663,24 +4684,8 @@ extern int slurm_send_recv_controller_rc_msg(slurm_msg_t *req, int *rc, { int ret_c; slurm_msg_t resp; - slurmdb_cluster_rec_t *save_comm_cluster_rec = comm_cluster_rec; -tryagain: if (!slurm_send_recv_controller_msg(req, &resp, comm_cluster_rec)) { - if (resp.msg_type == RESPONSE_SLURM_REROUTE_MSG) { - reroute_msg_t *rr_msg = (reroute_msg_t *)resp.data; - - /* Don't expect mutliple hops but in the case it does - * happen, free the previous rr cluster_rec. */ - if (comm_cluster_rec && - (comm_cluster_rec != save_comm_cluster_rec)) - slurmdb_destroy_cluster_rec(comm_cluster_rec); - - comm_cluster_rec = rr_msg->working_cluster_rec; - slurmdb_setup_cluster_rec(comm_cluster_rec); - rr_msg->working_cluster_rec = NULL; - goto tryagain; - } *rc = slurm_get_return_code(resp.msg_type, resp.data); slurm_free_msg_data(resp.msg_type, resp.data); ret_c = 0; @@ -4688,9 +4693,6 @@ tryagain: ret_c = -1; } - if (comm_cluster_rec != save_comm_cluster_rec) - slurmdb_destroy_cluster_rec(comm_cluster_rec); - return ret_c; } diff --git a/src/sbatch/opt.c b/src/sbatch/opt.c index c16126cf0424744a66713e97cde87d784a6a728f..61d3f81c66459f2c1a4c60af96f6f6057c1f60ec 100644 --- a/src/sbatch/opt.c +++ b/src/sbatch/opt.c @@ -3534,7 +3534,7 @@ static void _help(void) "\n" "Constraint options:\n" -" --cluster-constraint=list specify a list of cluster constraints\n" +" --cluster-constraint=[!]list specify a list of cluster constraints\n" " --contiguous demand a contiguous range of nodes\n" " -C, --constraint=list specify a list of constraints\n" " -F, --nodefile=filename request a specific list of hosts\n" diff --git a/src/slurmctld/fed_mgr.c b/src/slurmctld/fed_mgr.c index 7b4f736f65e4297ad2f2807505d8f9e0dd785e7c..c9ff92d1249a5c93d3c6289c97351a825b2baf7b 100644 --- a/src/slurmctld/fed_mgr.c +++ b/src/slurmctld/fed_mgr.c @@ -559,11 +559,31 @@ static void _remove_self_from_federation() _leave_federation(); } +static int _foreach_job_completed(void *object, void *arg) +{ + struct job_record *job_ptr = (struct job_record *)object; + + if (IS_JOB_COMPLETED(job_ptr)) + return SLURM_SUCCESS; + + return SLURM_ERROR; +} + +static int _foreach_job_no_requeue(void *object, void *arg) +{ + struct job_record *job_ptr = (struct job_record *)object; + + if (job_ptr->details) + job_ptr->details->requeue = 0; + + return SLURM_SUCCESS; +} + static void *_job_watch_thread(void *arg) { struct timespec ts = {0, 0}; - slurmctld_lock_t job_read_fed_write_lock = { - NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK }; + slurmctld_lock_t job_write_fed_write_lock = { + NO_LOCK, WRITE_LOCK, NO_LOCK, NO_LOCK, WRITE_LOCK }; #if HAVE_SYS_PRCTL_H if (prctl(PR_SET_NAME, "fed_jobw", NULL, NULL, NULL) < 0) { @@ -574,11 +594,11 @@ static void *_job_watch_thread(void *arg) info("%s: started job_watch thread", __func__); while (!slurmctld_config.shutdown_time && !stop_job_watch_thread) { - int job_count = 0; + int tot_jobs, comp_jobs; slurm_mutex_lock(&job_watch_mutex); if (!slurmctld_config.shutdown_time && !stop_job_watch_thread) { - ts.tv_sec = time(NULL) + 30; + ts.tv_sec = time(NULL) + 5; pthread_cond_timedwait(&job_watch_cond, &job_watch_mutex, &ts); } @@ -587,32 +607,41 @@ static void *_job_watch_thread(void *arg) if (slurmctld_config.shutdown_time || stop_job_watch_thread) break; - lock_slurmctld(job_read_fed_write_lock); + lock_slurmctld(job_write_fed_write_lock); if (!fed_mgr_cluster_rec) { /* not part of the federation anymore */ - unlock_slurmctld(job_read_fed_write_lock); + unlock_slurmctld(job_write_fed_write_lock); break; } - if ((job_count = list_count(job_list))) { - if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) - info("%s: %d remaining jobs before being removed from the federation", - __func__, job_count); + if ((tot_jobs = list_count(job_list)) != + (comp_jobs = list_for_each(job_list, _foreach_job_completed, + NULL))) { + if (slurmctld_conf.debug_flags & DEBUG_FLAG_FEDR) { + /* list_for_each negates the count if failed. */ + int remaining_jobs = tot_jobs + comp_jobs + 1; + info("%s: at least %d remaining jobs before being drained and/or removed from the federation", + __func__, remaining_jobs); + } } else { if (fed_mgr_cluster_rec->fed.state & - CLUSTER_FED_STATE_REMOVE) + CLUSTER_FED_STATE_REMOVE) { + /* prevent federated jobs from being requeued */ + list_for_each(job_list, _foreach_job_no_requeue, + NULL); _remove_self_from_federation(); - else if (fed_mgr_cluster_rec->fed.state & - CLUSTER_FED_STATE_DRAIN) + } else if (fed_mgr_cluster_rec->fed.state & + CLUSTER_FED_STATE_DRAIN) { _mark_self_as_drained(); + } - unlock_slurmctld(job_read_fed_write_lock); + unlock_slurmctld(job_write_fed_write_lock); break; } - unlock_slurmctld(job_read_fed_write_lock); + unlock_slurmctld(job_write_fed_write_lock); } job_watch_thread_id = 0; @@ -850,7 +879,7 @@ static int _persist_update_job(slurmdb_cluster_rec_t *conn, uint32_t job_id, slurm_msg_t_init(&tmp_msg); tmp_msg.msg_type = REQUEST_UPDATE_JOB; tmp_msg.data = data; - tmp_msg.protocol_version = SLURM_PROTOCOL_VERSION; + tmp_msg.protocol_version = conn->rpc_version; buffer = init_buf(BUF_SIZE); pack_msg(&tmp_msg, buffer); @@ -864,8 +893,9 @@ static int _persist_update_job(slurmdb_cluster_rec_t *conn, uint32_t job_id, sib_msg.job_id = job_id; slurm_msg_t_init(&req_msg); - req_msg.msg_type = REQUEST_SIB_MSG; - req_msg.data = &sib_msg; + req_msg.msg_type = REQUEST_SIB_MSG; + req_msg.protocol_version = tmp_msg.protocol_version; + req_msg.data = &sib_msg; rc = _queue_rpc(conn, &req_msg, 0, false); @@ -889,8 +919,9 @@ static int _persist_update_job_resp(slurmdb_cluster_rec_t *conn, sib_msg.return_code = return_code; slurm_msg_t_init(&req_msg); - req_msg.msg_type = REQUEST_SIB_MSG; - req_msg.data = &sib_msg; + req_msg.msg_type = REQUEST_SIB_MSG; + req_msg.protocol_version = conn->rpc_version; + req_msg.data = &sib_msg; rc = _queue_rpc(conn, &req_msg, job_id, false); @@ -922,8 +953,9 @@ static int _persist_fed_job_revoke(slurmdb_cluster_rec_t *conn, uint32_t job_id, sib_msg.return_code = return_code; slurm_msg_t_init(&req_msg); - req_msg.msg_type = REQUEST_SIB_MSG; - req_msg.data = &sib_msg; + req_msg.msg_type = REQUEST_SIB_MSG; + req_msg.protocol_version = conn->rpc_version; + req_msg.data = &sib_msg; rc = _queue_rpc(conn, &req_msg, job_id, false); @@ -945,8 +977,9 @@ static int _persist_fed_job_response(slurmdb_cluster_rec_t *conn, uint32_t job_i sib_msg.return_code = return_code; slurm_msg_t_init(&req_msg); - req_msg.msg_type = REQUEST_SIB_MSG; - req_msg.data = &sib_msg; + req_msg.msg_type = REQUEST_SIB_MSG; + req_msg.protocol_version = conn->rpc_version; + req_msg.data = &sib_msg; rc = _queue_rpc(conn, &req_msg, job_id, false); @@ -983,7 +1016,8 @@ static int _persist_fed_job_lock(slurmdb_cluster_rec_t *conn, uint32_t job_id, else req_msg.msg_type = REQUEST_SIB_JOB_UNLOCK; - req_msg.data = &sib_msg; + req_msg.protocol_version = conn->rpc_version; + req_msg.data = &sib_msg; if (_send_recv_msg(conn, &req_msg, &resp_msg, false)) { rc = SLURM_PROTOCOL_ERROR; @@ -1038,8 +1072,9 @@ static int _persist_fed_job_start(slurmdb_cluster_rec_t *conn, sib_msg.cluster_id = cluster_id; sib_msg.start_time = start_time; - req_msg.msg_type = REQUEST_SIB_MSG; - req_msg.data = &sib_msg; + req_msg.msg_type = REQUEST_SIB_MSG; + req_msg.protocol_version = conn->rpc_version; + req_msg.data = &sib_msg; rc = _queue_rpc(conn, &req_msg, job_id, false); @@ -1075,7 +1110,7 @@ static int _persist_fed_job_cancel(slurmdb_cluster_rec_t *conn, uint32_t job_id, slurm_msg_t_init(&tmp_msg); tmp_msg.msg_type = REQUEST_CANCEL_JOB_STEP; tmp_msg.data = &kill_req; - tmp_msg.protocol_version = SLURM_PROTOCOL_VERSION; + tmp_msg.protocol_version = conn->rpc_version; buffer = init_buf(BUF_SIZE); pack_msg(&tmp_msg, buffer); @@ -1088,8 +1123,9 @@ static int _persist_fed_job_cancel(slurmdb_cluster_rec_t *conn, uint32_t job_id, sib_msg.req_uid = uid; slurm_msg_t_init(&req_msg); - req_msg.msg_type = REQUEST_SIB_MSG; - req_msg.data = &sib_msg; + req_msg.msg_type = REQUEST_SIB_MSG; + req_msg.protocol_version = tmp_msg.protocol_version; + req_msg.data = &sib_msg; rc = _queue_rpc(conn, &req_msg, job_id, false); @@ -1124,7 +1160,7 @@ static int _persist_fed_job_requeue(slurmdb_cluster_rec_t *conn, slurm_msg_t_init(&tmp_msg); tmp_msg.msg_type = REQUEST_JOB_REQUEUE; tmp_msg.data = &requeue_req; - tmp_msg.protocol_version = SLURM_PROTOCOL_VERSION; + tmp_msg.protocol_version = conn->rpc_version; buffer = init_buf(BUF_SIZE); pack_msg(&tmp_msg, buffer); @@ -1137,8 +1173,9 @@ static int _persist_fed_job_requeue(slurmdb_cluster_rec_t *conn, sib_msg.data_version = tmp_msg.protocol_version; slurm_msg_t_init(&req_msg); - req_msg.msg_type = REQUEST_SIB_MSG; - req_msg.data = &sib_msg; + req_msg.msg_type = REQUEST_SIB_MSG; + req_msg.protocol_version = tmp_msg.protocol_version; + req_msg.data = &sib_msg; rc = _queue_rpc(conn, &req_msg, job_id, false); @@ -1749,8 +1786,9 @@ extern int _handle_fed_send_job_sync(fed_job_update_info_t *job_update_info) sib_msg.start_time = sync_time; slurm_msg_t_init(&req_msg); - req_msg.msg_type = REQUEST_SIB_MSG; - req_msg.data = &sib_msg; + req_msg.msg_type = REQUEST_SIB_MSG; + req_msg.protocol_version = job_msg.protocol_version; + req_msg.data = &sib_msg; rc = _queue_rpc(sibling, &req_msg, 0, false); @@ -2731,6 +2769,8 @@ static int _submit_sibling_jobs(job_desc_msg_t *job_desc, slurm_msg_t *msg, sib_msg_t sib_msg = {0}; slurmdb_cluster_rec_t *sibling = NULL; slurm_msg_t req_msg; + uint16_t last_rpc_version = NO_VAL16; + Buf buffer = NULL; xassert(job_desc); xassert(msg); @@ -2767,6 +2807,25 @@ static int _submit_sibling_jobs(job_desc_msg_t *job_desc, slurm_msg_t *msg, else sib_msg.sib_msg_type = FED_JOB_SUBMIT_BATCH; + /* Pack message buffer according to sibling's rpc version. A + * submission from a client will already have a buffer with the + * packed job_desc from the client. If this controller is + * submitting new sibling jobs then the buffer needs to be + * packed according to each siblings rpc_version. */ + if (!msg->buffer && + (last_rpc_version != sibling->rpc_version)) { + free_buf(buffer); + msg->protocol_version = sibling->rpc_version; + buffer = init_buf(BUF_SIZE); + pack_msg(msg, buffer); + sib_msg.data_buffer = buffer; + sib_msg.data_version = msg->protocol_version; + + last_rpc_version = sibling->rpc_version; + } + + req_msg.protocol_version = sibling->rpc_version; + if (!(rc = _queue_rpc(sibling, &req_msg, 0, false))) job_desc->fed_siblings_active |= FED_SIBLING_BIT(sibling->fed.id); @@ -2774,6 +2833,8 @@ static int _submit_sibling_jobs(job_desc_msg_t *job_desc, slurm_msg_t *msg, } list_iterator_destroy(sib_itr); + free_buf(buffer); + return ret_rc; } @@ -2789,7 +2850,6 @@ static int _prepare_submit_siblings(struct job_record *job_ptr, int rc = SLURM_SUCCESS; uint32_t origin_id; job_desc_msg_t *job_desc; - Buf buffer; slurm_msg_t msg; xassert(job_ptr); @@ -2804,15 +2864,11 @@ static int _prepare_submit_siblings(struct job_record *job_ptr, if (!(job_desc = copy_job_record_to_job_desc(job_ptr))) return SLURM_ERROR; - /* have to pack job_desc into a buffer */ + /* Have to pack job_desc into a buffer. _submit_sibling_jobs will pack + * the job_desc according to each sibling's rpc_version. */ slurm_msg_t_init(&msg); msg.msg_type = REQUEST_RESOURCE_ALLOCATION; msg.data = job_desc; - msg.protocol_version = SLURM_PROTOCOL_VERSION; - - buffer = init_buf(BUF_SIZE); - pack_msg(&msg, buffer); - msg.buffer = buffer; if (_submit_sibling_jobs(job_desc, &msg, false, dest_sibs)) error("Failed to submit fed job to siblings"); @@ -2827,7 +2883,6 @@ static int _prepare_submit_siblings(struct job_record *job_ptr, job_ptr->fed_details->siblings_active |= job_desc->fed_siblings_active; update_job_fed_details(job_ptr); - free_buf(buffer); /* free the environment since all strings are stored in one * xmalloced buffer */ if (job_desc->environment) { @@ -3013,6 +3068,7 @@ static int _validate_cluster_features(char *spec_features, uint64_t *cluster_bitmap) { int rc = SLURM_SUCCESS; + bool negative_logic = false; uint64_t feature_sibs = 0; char *feature = NULL; slurmdb_cluster_rec_t *sib; @@ -3030,13 +3086,27 @@ static int _validate_cluster_features(char *spec_features, feature_itr = list_iterator_create(req_features); sib_itr = list_iterator_create(fed_mgr_fed_rec->cluster_list); + feature = list_peek(req_features); + if (feature && feature[0] == '!') { + feature_sibs = _get_all_sibling_bits(); + negative_logic = true; + } + while ((feature = list_next(feature_itr))) { + if (negative_logic && feature[0] == '!') + feature++; bool found = false; while ((sib = list_next(sib_itr))) { if (sib->fed.feature_list && list_find_first(sib->fed.feature_list, slurm_find_char_in_list, feature)) { - feature_sibs |= FED_SIBLING_BIT(sib->fed.id); + if (negative_logic) { + feature_sibs &= + ~FED_SIBLING_BIT(sib->fed.id); + } else { + feature_sibs |= + FED_SIBLING_BIT(sib->fed.id); + } found = true; } } @@ -3047,6 +3117,12 @@ static int _validate_cluster_features(char *spec_features, rc = SLURM_ERROR; goto end_features; } + if (negative_logic && !feature_sibs) { + error("eliminated all viable clusters with constraint '%s'", + feature); + rc = SLURM_ERROR; + goto end_features; + } list_iterator_reset(sib_itr); } end_features: @@ -3060,15 +3136,7 @@ end_features: return rc; } -/* Determine how to submit a federated a job. - * - * First tries to find a cluster that can start the job now. If a cluster can - * start the job now, then a sibling job is submitted to that cluster. If no - * cluster can start the job now, then siblings jobs are submitted to each - * sibling. - * - * Does its own locking (job and fed). Doesn't have a job write lock when - * communicating with siblings to prevent blocking on sibling communications. +/* submit a federated job. * * IN msg - msg that contains packed job_desc msg to send to siblings. * IN job_desc - original job_desc msg. diff --git a/src/slurmctld/proc_req.c b/src/slurmctld/proc_req.c index 829c568c0e4000080f22b7162bb91f2f0a187d63..c26b0df2b249f5bdc7c088d8728253bdab7c58dc 100644 --- a/src/slurmctld/proc_req.c +++ b/src/slurmctld/proc_req.c @@ -118,6 +118,8 @@ static int _make_step_cred(struct step_record *step_rec, slurm_cred_t **slurm_cred, uint16_t protocol_version); inline static void _proc_multi_msg(uint32_t rpc_uid, slurm_msg_t *msg); +static int _route_msg_to_origin(slurm_msg_t *msg, char *job_id_str, + uint32_t job_id, uid_t uid); static void _throttle_fini(int *active_rpc_cnt); static void _throttle_start(int *active_rpc_cnt); @@ -3402,42 +3404,18 @@ static void _slurm_rpc_update_job(slurm_msg_t * msg) DEF_TIMERS; job_desc_msg_t *job_desc_msg = (job_desc_msg_t *) msg->data; /* Locks: Read config, write job, write node, read partition, read fed*/ + slurmctld_lock_t fed_read_lock = { + NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; slurmctld_lock_t job_write_lock = { READ_LOCK, WRITE_LOCK, WRITE_LOCK, READ_LOCK, READ_LOCK }; - slurmctld_lock_t fed_read_lock = - {NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurmctld_config.auth_info); - /* route msg to origin cluster if a federated job */ lock_slurmctld(fed_read_lock); - if (!error_code && !msg->conn && fed_mgr_fed_rec) { - /* Don't send reroute if coming from a federated cluster (aka - * has a msg->conn). */ - uint32_t job_id, origin_id; - - if (job_desc_msg->job_id_str) - job_id = strtol(job_desc_msg->job_id_str, NULL, 10); - else - job_id = job_desc_msg->job_id; - origin_id = fed_mgr_get_cluster_id(job_id); - - if (origin_id && (origin_id != fed_mgr_cluster_rec->fed.id)) { - slurmdb_cluster_rec_t *dst = - fed_mgr_get_cluster_by_id(origin_id); - if (!dst) { - error("couldn't find cluster by cluster id %d", - origin_id); - slurm_send_rc_msg(msg, SLURM_ERROR); - } else { - slurm_send_reroute_msg(msg, dst); - info("%s: REQUEST_UPDATE_JOB job %d uid %d routed to %s", - __func__, job_id, uid, dst->name); - } - - unlock_slurmctld(fed_read_lock); - return; - } + if (!_route_msg_to_origin(msg, job_desc_msg->job_id_str, + job_desc_msg->job_id, uid)) { + unlock_slurmctld(fed_read_lock); + return; } unlock_slurmctld(fed_read_lock); @@ -4416,11 +4394,21 @@ inline static void _slurm_rpc_requeue(slurm_msg_t * msg) DEF_TIMERS; requeue_msg_t *req_ptr = (requeue_msg_t *)msg->data; /* Locks: write job and node */ + slurmctld_lock_t fed_read_lock = { + NO_LOCK, NO_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; slurmctld_lock_t job_write_lock = { NO_LOCK, WRITE_LOCK, WRITE_LOCK, NO_LOCK, READ_LOCK }; uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurmctld_config.auth_info); + lock_slurmctld(fed_read_lock); + if (!_route_msg_to_origin(msg, req_ptr->job_id_str, req_ptr->job_id, + uid)) { + unlock_slurmctld(fed_read_lock); + return; + } + unlock_slurmctld(fed_read_lock); + START_TIMER; info("%s: Processing RPC: REQUEST_JOB_REQUEUE from uid=%d", __func__, @@ -4783,7 +4771,7 @@ inline static void _slurm_rpc_job_notify(slurm_msg_t * msg) int error_code; /* Locks: read job */ slurmctld_lock_t job_read_lock = { - NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK, NO_LOCK }; + NO_LOCK, READ_LOCK, NO_LOCK, NO_LOCK, READ_LOCK }; uid_t uid = g_slurm_auth_get_uid(msg->auth_cred, slurmctld_config.auth_info); job_notify_msg_t * notify_msg = (job_notify_msg_t *) msg->data; @@ -4796,9 +4784,48 @@ inline static void _slurm_rpc_job_notify(slurm_msg_t * msg) /* do RPC call */ lock_slurmctld(job_read_lock); job_ptr = find_job_record(notify_msg->job_id); + + /* If job is found on the cluster, it could be pending, the origin + * cluster, or running on the sibling cluster. If it's not there then + * route it to the origin. */ + if (!job_ptr && + !_route_msg_to_origin(msg, NULL, notify_msg->job_id, uid)) { + unlock_slurmctld(job_read_lock); + return; + } + if (!job_ptr) error_code = ESLURM_INVALID_JOB_ID; - else if ((job_ptr->user_id == uid) || validate_slurm_user(uid)) + else if (job_ptr->batch_flag && + fed_mgr_cluster_rec && job_ptr->fed_details && + fed_mgr_is_origin_job(job_ptr) && + IS_JOB_REVOKED(job_ptr) && + job_ptr->fed_details->cluster_lock && + (job_ptr->fed_details->cluster_lock != + fed_mgr_cluster_rec->fed.id)) { + + /* Route to the cluster that is running the batch job. srun jobs + * don't need to be routed to the running cluster since the + * origin cluster knows how to contact the listening srun. */ + slurmdb_cluster_rec_t *dst = + fed_mgr_get_cluster_by_id( + job_ptr->fed_details->cluster_lock); + if (dst) { + slurm_send_reroute_msg(msg, dst); + info("%s: %s job %d uid %d routed to %s", + __func__, rpc_num2string(msg->msg_type), + job_ptr->job_id, uid, dst->name); + + unlock_slurmctld(job_read_lock); + END_TIMER2("_slurm_rpc_job_notify"); + return; + } + + error("couldn't find cluster by cluster id %d", + job_ptr->fed_details->cluster_lock); + error_code = ESLURM_INVALID_CLUSTER_NAME; + + } else if ((job_ptr->user_id == uid) || validate_slurm_user(uid)) error_code = srun_user_message(job_ptr, notify_msg->message); else { error_code = ESLURM_USER_ID_MISSING; @@ -5982,3 +6009,44 @@ static void _proc_multi_msg(uint32_t rpc_uid, slurm_msg_t *msg) free_buf(resp_buf); return; } + +/* Route msg to federated job's origin. + * RET returns SLURM_SUCCESS if the msg was routed. + */ +static int _route_msg_to_origin(slurm_msg_t *msg, char *src_job_id_str, + uint32_t src_job_id, uid_t uid) +{ + xassert(msg); + + /* route msg to origin cluster if a federated job */ + if (!msg->conn && fed_mgr_fed_rec) { + /* Don't send reroute if coming from a federated cluster (aka + * has a msg->conn). */ + uint32_t job_id, origin_id; + + if (src_job_id_str) + job_id = strtol(src_job_id_str, NULL, 10); + else + job_id = src_job_id; + origin_id = fed_mgr_get_cluster_id(job_id); + + if (origin_id && (origin_id != fed_mgr_cluster_rec->fed.id)) { + slurmdb_cluster_rec_t *dst = + fed_mgr_get_cluster_by_id(origin_id); + if (!dst) { + error("couldn't find cluster by cluster id %d", + origin_id); + slurm_send_rc_msg(msg, SLURM_ERROR); + } else { + slurm_send_reroute_msg(msg, dst); + info("%s: %s job %d uid %d routed to %s", + __func__, rpc_num2string(msg->msg_type), + job_id, uid, dst->name); + } + + return SLURM_SUCCESS; + } + } + + return SLURM_ERROR; +} diff --git a/testsuite/expect/Makefile.am b/testsuite/expect/Makefile.am index 1763770359ba428d6e35010d5e10f08b8ecef39a..b1e32933dfbee6ed4c9f73f1c55838373c9594f6 100644 --- a/testsuite/expect/Makefile.am +++ b/testsuite/expect/Makefile.am @@ -616,7 +616,8 @@ EXTRA_DIST = \ test37.9 \ test37.10 \ test37.11 \ - test37.12 + test37.12 \ + test37.13 distclean-local: rm -rf *error *output diff --git a/testsuite/expect/Makefile.in b/testsuite/expect/Makefile.in index 724dbf156c21d35793fd090de5ef3dab91a7fe16..dac3579ee5208bbeabcf2d265e2d9bc91f1c0b12 100644 --- a/testsuite/expect/Makefile.in +++ b/testsuite/expect/Makefile.in @@ -1035,7 +1035,8 @@ EXTRA_DIST = \ test37.9 \ test37.10 \ test37.11 \ - test37.12 + test37.12 \ + test37.13 all: all-am diff --git a/testsuite/expect/README b/testsuite/expect/README index 4cf865c83595bf2e3867fcf576ebe4c078be47c2..65e7a6be01f5ac8ddd230aa82c5962020809708e 100644 --- a/testsuite/expect/README +++ b/testsuite/expect/README @@ -809,3 +809,4 @@ test37.9 Verify federated scontrol, squeue, sinfo and sprio output test37.10 Federated job cancellations test37.11 Federated sacct output test37.12 Federated sbatch|srun --test-only +test37.13 Validate federated arrays diff --git a/testsuite/expect/test37.13 b/testsuite/expect/test37.13 new file mode 100755 index 0000000000000000000000000000000000000000..4aaf84ac70e163e4a99e3f8b10f555b9167be886 --- /dev/null +++ b/testsuite/expect/test37.13 @@ -0,0 +1,350 @@ +#!/usr/bin/expect +############################################################################ +# Purpose: Test federated arrays +# +# Reqs: 1. Using slurmdbd accounting storage type and is up +# 2. fed_slurm_base is defined in globals.local - set to directory that +# has access to each federation configure (fedc1, fedc2, fedc3). +# Eg. +# fedr/slurm/ (src) +# fedr/fed1/bin +# fedr/fed1/sbin +# fedr/fed1/etc +# fedr/fed1/... +# fedr/fed2/... +# fedr/fed3/... +# 3. controllers are up and running. +# +# Output: "TEST: #.#" followed by "SUCCESS" if test was successful, OR +# "FAILURE: ..." otherwise with an explanation of the failure, OR +# anything else indicates a failure mode that must be investigated. +############################################################################ +# Copyright (C) 2017 SchedMD LLC. +# Written by Isaac Hartung <ihartung@schedmd.com> +# +# This file is part of SLURM, a resource management program. +# For details, see <https://slurm.schedmd.com/>. +# Please also read the included file: DISCLAIMER. +# +# SLURM is free software; you can redistribute it and/or modify it under +# the terms of the GNU General Public License as published by the Free +# Software Foundation; either version 2 of the License, or (at your option) +# any later version. +# +# SLURM is distributed in the hope that it will be useful, but WITHOUT ANY +# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +# details. +# +# You should have received a copy of the GNU General Public License along +# with SLURM; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +############################################################################ + +source ./globals +source ./globals_accounting +source ./globals_federation + +set test_id "37.13" +set exit_code 0 +set fed_name "feda" +set user_name "" +set srun_job_cnt 0 +set my_sbatch "${fed_slurm_base}/$fedc1/bin/sbatch" +set my_squeue "${fed_slurm_base}/$fedc1/bin/squeue" + +set eol "\r\n" + +print_header $test_id + +# +# Check accounting config and bail if not found. +# +if { [test_account_storage] == 0 } { + log_warn "This test can't be run without a usable AccountStorageType" + exit 0 +} + +if { [string compare [check_accounting_admin_level] "Administrator"] } { + log_warn "This test can't be run without being an Accounting administrator.\n \ + Use: sacctmgr mod user \$USER set admin=admin." + exit 0 +} + +proc sbatch { options regex } { + global number bin_sleep my_sbatch fedc1 fedc2 fedc3 + + set matches 0 + set job_id 0 + set capture 0 + set command "$my_sbatch -N1 --exclusive -t1 -a1-15 -o/dev/null " + append command $options + append command " --wrap \"sleep 20\"" + if {[string match $regex ""]} { + set capture 1 + set regex "Submitted batch job ($number).+" + } + spawn {*}$command + expect { + -re "$regex" { + incr matches + if {$capture} { + set job_id $expect_out(1,string) + } + } + timeout { + log_error "sbatch not responding" + end_it 1 + } + eof { + wait + } + } + if {$matches != 1} { + log_error "batch submit failure: expected $regex" + end_it 1 + } + + return $job_id +} + +proc squeue { regex } { + global my_squeue + + set matches 0 + set command "$my_squeue --noheader -a -Mfed1,fed2,fed3" + spawn {*}$command + expect { + -re "$regex" { + incr matches + } + eof { + wait + } + } + if {$matches != 1} { + log_error "unexpected error in squeue. expected $regex" + end_it 1 + } +} + +proc cancel_all_jobs { } { + global scancel user_name fedc1 fedc2 fedc3 + + spawn $scancel -M$fedc1,$fedc2,$fedc3 --user $user_name + expect { + eof { + wait + } + } + sleep 5 +} + +proc cleanup { } { + global scancel fed_name user_name bin_rm file_in fedc1 fedc2 fedc3 + global test_id bin_bash my_sacctmgr + + cancel_all_jobs + exec $bin_bash -c "$bin_rm -f test$test_id*.out" + + return [delete_federations $fed_name] +} + +proc end_it { exit_code } { + global test_id my_squeue + cleanup + if {$exit_code == 0} { + print_success $test_id + } + exit $exit_code +} + +#start test + +if {[test_federation_setup]} { + log_warn "WARNING: This test can't be run without fed_slurm_base,\ + fedc1, fedc2, fedc3 setup in globals.local." + exit 0 +} + +if {[test_cluster_up $fedc1] || + [test_cluster_up $fedc2] || + [test_cluster_up $fedc3]} { + end_it 1 +} + +set user_name [get_my_user_name] + +# Remove existing setup +if {[cleanup] != 0} { + log_error "failed to cleanup" + end_it 1 +} + +# add clusters to federation +if {[setup_federation $fed_name]} { + log_error "failed to setup federation" + end_it 1 +} + +send_user "\n################################################################\n" +send_user "Setup cluster features" +send_user "\n################################################################\n" + +set matches 0 +set my_pid [spawn $sacctmgr -i modify cluster $fedc1 set features=fa] +expect { + -re "Setting$eol" { + incr matches + exp_continue + } + -re "^\\s+Feature\\s+=\\s+fa" { + incr matches + exp_continue + } + -re "Modified cluster...$eol" { + incr matches + exp_continue + } + -re "^\\s+$fedc1$eol" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr mod not responding\n" + slow_kill $my_pid + set exit_code 1 + } + eof { + wait + } +} +if {$exit_code || $matches != 4} { + send_user "$matches FAILURE: unexpected error.\n" + end_it 1 +} + +set matches 0 +set my_pid [spawn $sacctmgr -i modify cluster $fedc2 set features=fb] +expect { + -re "Setting$eol" { + incr matches + exp_continue + } + -re "^\\s+Feature\\s+=\\s+fb" { + incr matches + exp_continue + } + -re "Modified cluster...$eol" { + incr matches + exp_continue + } + -re "^\\s+$fedc2$eol" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr mod not responding\n" + slow_kill $my_pid + set exit_code 1 + } + eof { + wait + } +} +if {$exit_code || $matches != 4} { + send_user "$matches FAILURE: unexpected error.\n" + end_it 1 +} + +set matches 0 +set my_pid [spawn $sacctmgr -i modify cluster $fedc3 set features=fc] +expect { + -re "Setting$eol" { + incr matches + exp_continue + } + -re "^\\s+Feature\\s+=\\s+fc" { + incr matches + exp_continue + } + -re "Modified cluster...$eol" { + incr matches + exp_continue + } + -re "^\\s+$fedc3$eol" { + incr matches + exp_continue + } + timeout { + send_user "\nFAILURE: sacctmgr mod not responding\n" + slow_kill $my_pid + set exit_code 1 + } + eof { + wait + } +} +if {$exit_code || $matches != 4} { + send_user "$matches FAILURE: unexpected error.\n" + end_it 1 +} + +send_user "\n################################################################\n" +send_user "Test federated arrays" +send_user "\n################################################################\n" + +cancel_all_jobs + +set jid(0) [sbatch "" ""] + +set jt "$jid(0)_1" + +wait_for_fed_job $jt RUNNING $fedc1,$fedc2,$fedc2 + +set r0 "(\\s+($jid(0))_(\[1-9\]|10).+$eol)" +set r1 "(\\s+($jid(0))_\\\[11-15\\\].+$eol)" + +set regf1 "CLUSTER: $fedc1$eol$r1$r0{10}" +set regf2 "CLUSTER: $fedc2$eol\\s*$eol" +set regf3 "CLUSTER: $fedc3$eol\\s*" + +squeue "$regf1$regf2$regf3" + +set jid(1) [sbatch "--cluster-constraint=fa" ""] + +set r2 "(\\s+($jid(1))_\\\[1-15\\\].+$eol)" + +set regf1 "CLUSTER: $fedc1$eol.*($r1|$r2){2}$r0{10}" +set regf2 "CLUSTER: $fedc2$eol\\s*$eol" +set regf3 "CLUSTER: $fedc3$eol\\s*" + +squeue "$regf1$regf2$regf3" + +set jt "$jid(0)_15" + +wait_for_fed_job $jt RUNNING $fedc1,$fedc2,$fedc2 + +sbatch "--cluster-constraint=fb" "sbatch: error: federated job arrays must run on local cluster${eol}sbatch: error: Batch job submission failed: No eligible clusters for federated job${eol}" +set jid(2) [sbatch "--cluster-constraint=fb -Mfed2" ""] + +set jt "$jid(2)_1" + +wait_for_fed_job $jt RUNNING $fedc1,$fedc2,$fedc2 + +set r0 "(\\s+($jid(0))_1\[1-5\].+$eol)" +set r1 "(\\s+($jid(1))_\[1-5\].+$eol)" +set r2 "(\\s+($jid(1))_\\\[6-15\\\].+$eol)" +set r3 "(\\s+($jid(2))_(\[1-9\]|10).+$eol)" +set r4 "(\\s+($jid(2))_\\\[11-15\\\].+$eol)" + +set regf1 "CLUSTER: $fedc1$eol$r2.*($r0|$r1){10}" +set regf2 "CLUSTER: $fedc2$eol\\s*$r4$r3{10}" +set regf3 "CLUSTER: $fedc3$eol\\s*" + +squeue "$regf1$regf2$regf3" + +sbatch "--cluster-constraint=fa -Mfed2" "sbatch: error: Batch.+" + +# All Done +end_it 0 diff --git a/testsuite/expect/test37.6 b/testsuite/expect/test37.6 index 7348ee8b04f2a13efebcc8e5112cf0d5f87fdd15..ae0e8ac790c5ccc09a8c516b732ebf80ac922850 100755 --- a/testsuite/expect/test37.6 +++ b/testsuite/expect/test37.6 @@ -347,10 +347,6 @@ if {$exit_code || $matches != 4} { end_it 1 } -send_user "\n################################################################\n" -send_user "Modify Feature of Running Job" -send_user "\n################################################################\n" -cancel_all_jobs set my_scontrol "${fed_slurm_base}/$fedc1/bin/scontrol" @@ -358,6 +354,12 @@ set job_id 0 set script "\"sleep 300\"" set my_sbatch "${fed_slurm_base}/$fedc1/bin/sbatch" + +send_user "\n################################################################\n" +send_user "Modify Feature of Running Job" +send_user "\n################################################################\n" +cancel_all_jobs + #first job set job_id [sbatch $my_sbatch $script fa] @@ -380,7 +382,7 @@ verify $fedc1 $job_id "PD\\s+$fedc1\\s+$fedc1" "Job not running on designated cl modify $job_id invalid "Invalid cluster feature specification for job" "allowed invalid feature" send_user "\n################################################################\n" -send_user "Move pending job to fed2 with feature fb" +send_user "Move pending job to $fedc2 with feature fb" send_user "\n################################################################\n" modify $job_id fb "" "did not allow user to modify features of pending job" @@ -393,7 +395,7 @@ verify $fedc1 $job_id "RV\\s+$fedc2\\s+$fedc2" "Job not revoked" verify $fedc2 $job_id "R\\s+$fedc2\\s+$fedc2" "Job not running on designated cluster" send_user "\n################################################################\n" -send_user "New job to fa, then move job to fed2,fed3. Runs on fed3" +send_user "New job to fa, then move job to $fedc2,$fedc3. Runs on $fedc3" send_user "\n################################################################\n" #third job @@ -410,7 +412,7 @@ verify $fedc1 $job_id "RV\\s+$fedc2,$fedc3\\s+$fedc3" "Job not revoked" verify $fedc3 $job_id "R\\s+$fedc2,$fedc3\\s+$fedc3" "Job not running on designated cluster" send_user "\n################################################################\n" -send_user "New job to fa, then move job to fed3" +send_user "New job to fa, then move job to $fedc3" send_user "\n################################################################\n" #fourth job @@ -422,7 +424,7 @@ verify $fedc1 $job_id "RV\\s+$fedc3\\s+$fedc3" "Job not revoked" verify $fedc3 $job_id "PD\\s+$fedc3\\s+NA" "Job not running on designated cluster" send_user "\n################################################################\n" -send_user "Move job to fed1,fed3" +send_user "Move job to $fedc1,$fedc3" send_user "\n################################################################\n" modify $job_id "fa,fc" "" "did not allow user to modify features of pending job" @@ -431,7 +433,7 @@ verify $fedc2 $job_id ".+Invalid.+" "Job found on wrong cluster" verify $fedc3 $job_id "PD\\s+$fedc1,$fedc3\\s+NA" "Job found on wrong cluster" send_user "\n################################################################\n" -send_user "Move job to fed1" +send_user "Move job to $fedc1" send_user "\n################################################################\n" modify $job_id "fa" "" "did not allow user to modify features of pending job" @@ -440,7 +442,7 @@ verify $fedc2 $job_id ".+Invalid.+" "Job found on wrong cluster" verify $fedc3 $job_id ".+Invalid.+" "Job found on wrong cluster" send_user "\n################################################################\n" -send_user "Clear features. Should go to pending on fed1,fed2,fed3" +send_user "Clear features. Should go to pending on $fedc1,$fedc2,$fedc3" send_user "\n################################################################\n" #Clear Features @@ -512,21 +514,21 @@ sbatch $my_sbatch $script fc set job_id [sbatch $my_sbatch $script "" --requeue] -verify $fedc1 $job_id "PD\\s+fed1,fed2,fed3\\s+fed1,fed2,fed3" "Job not running on designated cluster" -verify $fedc2 $job_id "PD\\s+fed1,fed2,fed3\\s+NA" "Job not running on designated cluster" -verify $fedc3 $job_id "PD\\s+fed1,fed2,fed3\\s+NA" "Job not running on designated cluster" +verify $fedc1 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+$fedc1,$fedc2,$fedc3" "Job not running on designated cluster" +verify $fedc2 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" +verify $fedc3 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" modify $job_id "fa" "" "did not allow user to modify features of pending job" -verify $fedc1 $job_id "PD\\s+fed1\\s+fed1" "Job not running on designated cluster" +verify $fedc1 $job_id "PD\\s+$fedc1\\s+$fedc1" "Job not running on designated cluster" verify $fedc2 $job_id ".+Invalid.+" "Job found on wrong cluster" verify $fedc3 $job_id ".+Invalid.+" "Job found on wrong cluster" modify $job_id "" "" "did not allow user to modify features of pending job" -verify $fedc1 $job_id "PD\\s+fed1,fed2,fed3\\s+fed1,fed2,fed3" "Job not running on designated cluster" -verify $fedc2 $job_id "PD\\s+fed1,fed2,fed3\\s+NA" "Job not running on designated cluster" -verify $fedc3 $job_id "PD\\s+fed1,fed2,fed3\\s+NA" "Job not running on designated cluster" +verify $fedc1 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+$fedc1,$fedc2,$fedc3" "Job not running on designated cluster" +verify $fedc2 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" +verify $fedc3 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" send_user "\n################################################################\n" send_user "Modify Federation Cluster Features" @@ -539,59 +541,59 @@ sbatch $my_sbatch $script fc set job_id [sbatch $my_sbatch $script ""] -verify $fedc1 $job_id "PD\\s+fed1,fed2,fed3\\s+fed1,fed2,fed3" "Job not running on designated cluster" -verify $fedc2 $job_id "PD\\s+fed1,fed2,fed3\\s+NA" "Job not running on designated cluster" -verify $fedc3 $job_id "PD\\s+fed1,fed2,fed3\\s+NA" "Job not running on designated cluster" +verify $fedc1 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+$fedc1,$fedc2,$fedc3" "Job not running on designated cluster" +verify $fedc2 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" +verify $fedc3 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" -modify_with_clusters $job_id "fed1" "" "did not allow user to modify features of pending job" +modify_with_clusters $job_id "$fedc1" "" "did not allow user to modify features of pending job" -verify $fedc1 $job_id "PD\\s+fed1\\s+fed1" "Job not running on designated cluster" +verify $fedc1 $job_id "PD\\s+$fedc1\\s+$fedc1" "Job not running on designated cluster" verify $fedc2 $job_id ".+Invalid.+" "Job found on wrong cluster" verify $fedc3 $job_id ".+Invalid.+" "Job found on wrong cluster" -modify_with_clusters $job_id "fed1,fed2" "" "did not allow user to modify features of pending job" +modify_with_clusters $job_id "$fedc1,$fedc2" "" "did not allow user to modify features of pending job" -verify $fedc1 $job_id "PD\\s+fed1,fed2\\s+fed1,fed2" "Job not running on designated cluster" -verify $fedc2 $job_id "PD\\s+fed1,fed2\\s+NA" "Job not running on designated cluster" +verify $fedc1 $job_id "PD\\s+$fedc1,$fedc2\\s+$fedc1,$fedc2" "Job not running on designated cluster" +verify $fedc2 $job_id "PD\\s+$fedc1,$fedc2\\s+NA" "Job not running on designated cluster" verify $fedc3 $job_id ".+Invalid.+" "Job found on wrong cluster" -modify_with_clusters $job_id "fed2" "" "did not allow user to modify features of pending job" +modify_with_clusters $job_id "$fedc2" "" "did not allow user to modify features of pending job" -verify $fedc1 $job_id "RV\\s+fed2\\s+fed2" "Job not running on designated cluster" -verify $fedc2 $job_id "PD\\s+fed2\\s+NA" "Job not running on designated cluster" +verify $fedc1 $job_id "RV\\s+$fedc2\\s+$fedc2" "Job not running on designated cluster" +verify $fedc2 $job_id "PD\\s+$fedc2\\s+NA" "Job not running on designated cluster" verify $fedc3 $job_id ".+Invalid.+" "Job found on wrong cluster" modify_with_clusters $job_id "" "" "did not allow user to modify features of pending job" -verify $fedc1 $job_id "PD\\s+fed1,fed2,fed3\\s+fed1,fed2,fed3" "Job not running on designated cluster" -verify $fedc2 $job_id "PD\\s+fed1,fed2,fed3\\s+NA" "Job not running on designated cluster" -verify $fedc3 $job_id "PD\\s+fed1,fed2,fed3\\s+NA" "Job not running on designated cluster" +verify $fedc1 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+$fedc1,$fedc2,$fedc3" "Job not running on designated cluster" +verify $fedc2 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" +verify $fedc3 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" -#this one shows that the fed1 is routing the update from fed1 to fed2 +#this one shows that the $fedc1 is routing the update from $fedc1 to $fedc2 send_user "\n################################################################\n" -send_user "fed1 routing the update from fed1 to fed2" +send_user "$fedc1 routing the update from $fedc1 to $fedc2" send_user "\n################################################################\n" -set job_id [sbatch $my_sbatch $script "" -Mfed2] +set job_id [sbatch $my_sbatch $script "" -M$fedc2] -verify $fedc2 $job_id "PD\\s+fed2\\s+fed2" "Job not running on designated cluster" +verify $fedc2 $job_id "PD\\s+$fedc2\\s+$fedc2" "Job not running on designated cluster" verify $fedc1 $job_id ".+Invalid.+" "Job found on wrong cluster" verify $fedc3 $job_id ".+Invalid.+" "Job found on wrong cluster" -modify_with_clusters $job_id "fed1" "" "did not allow user to modify features of pending job" +modify_with_clusters $job_id "$fedc1" "" "did not allow user to modify features of pending job" -verify $fedc2 $job_id "RV\\s+fed1\\s+fed1" "Job not running on designated cluster" -verify $fedc1 $job_id "PD\\s+fed1\\s+NA" "Job not running on designated cluster" +verify $fedc2 $job_id "RV\\s+$fedc1\\s+$fedc1" "Job not running on designated cluster" +verify $fedc1 $job_id "PD\\s+$fedc1\\s+NA" "Job not running on designated cluster" verify $fedc3 $job_id ".+Invalid.+" "Job found on wrong cluster" modify_with_clusters $job_id "" "" "did not allow user to modify features of pending job" -verify $fedc2 $job_id "PD\\s+fed1,fed2,fed3\\s+fed1,fed2,fed3" "Job not running on designated cluster" -verify $fedc1 $job_id "PD\\s+fed1,fed2,fed3\\s+NA" "Job not running on designated cluster" -verify $fedc3 $job_id "PD\\s+fed1,fed2,fed3\\s+NA" "Job not running on designated cluster" +verify $fedc2 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+$fedc1,$fedc2,$fedc3" "Job not running on designated cluster" +verify $fedc1 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" +verify $fedc3 $job_id "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" -#Federations and clusterfeatures (Fed1 and fb - so the job is not eligible +#Federations and clusterfeatures ($fedc1 and fb - so the job is not eligible # anywhere beacause it can't have both) send_user "\n################################################################\n" @@ -599,10 +601,10 @@ send_user "Federations and ClusterFeatures" send_user "\n################################################################\n" -modify_with_clusters $job_id "fed1" "" "did not allow user to modify features of pending job" +modify_with_clusters $job_id "$fedc1" "" "did not allow user to modify features of pending job" -verify $fedc2 $job_id "RV\\s+fed1\\s+fed1" "Job not running on designated cluster" -verify $fedc1 $job_id "PD\\s+fed1\\s+NA" "Job not running on designated cluster" +verify $fedc2 $job_id "RV\\s+$fedc1\\s+$fedc1" "Job not running on designated cluster" +verify $fedc1 $job_id "PD\\s+$fedc1\\s+NA" "Job not running on designated cluster" verify $fedc3 $job_id ".+Invalid.+" "Job found on wrong cluster" modify $job_id "fb" "" "did not allow user to modify features of pending job" @@ -613,10 +615,126 @@ verify $fedc3 $job_id ".+Invalid.+" "Job found on wrong cluster" modify $job_id "fa" "" "did not allow user to modify features of pending job" -verify $fedc2 $job_id "RV\\s+fed1\\s+fed1" "Job not running on designated cluster" -verify $fedc1 $job_id "PD\\s+fed1\\s+NA" "Job not running on designated cluster" +verify $fedc2 $job_id "RV\\s+$fedc1\\s+$fedc1" "Job not running on designated cluster" +verify $fedc1 $job_id "PD\\s+$fedc1\\s+NA" "Job not running on designated cluster" verify $fedc3 $job_id ".+Invalid.+" "Job found on wrong cluster" +send_user "\n################################################################\n" +send_user "Federations and Negative ClusterFeatures" +send_user "\n################################################################\n" + +cancel_all_jobs + +set jid(0) [sbatch $my_sbatch $script !fc,fb] +set jid(1) [sbatch $my_sbatch $script !fa,fc] +set jid(2) [sbatch $my_sbatch $script !fb,fa] + +verify $fedc1 $jid(0) "R\\s+$fedc1\\s+$fedc1" "Job not running on designated cluster" +verify $fedc2 $jid(1) "R\\s+$fedc2\\s+$fedc2" "Job not running on designated cluster" +verify $fedc3 $jid(2) "R\\s+$fedc3\\s+$fedc3" "Job not running on designated cluster" + +#submit pending job on all clusters then move around using negative constraints. + +set jid(4) [sbatch $my_sbatch $script ""] + +verify $fedc1 $jid(4) "PD\\s+$fedc1,$fedc2,$fedc3\\s+$fedc1,$fedc2,$fedc3" "Job not running on designated cluster" +verify $fedc2 $jid(4) "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" +verify $fedc3 $jid(4) "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" + +modify $jid(4) "!fb" "" "did not allow user to modify features of pending job" + +verify $fedc2 $jid(4) ".+Invalid.+" "Job found on wrong cluster" +verify $fedc1 $jid(4) "PD\\s+$fedc1,$fedc3\\s+" "Job not running on designated cluster" +verify $fedc3 $jid(4) "PD\\s+$fedc1,$fedc3\\s+" "Job not running on designated cluster" + +modify $jid(4) "!fc" "" "did not allow user to modify features of pending job" + +verify $fedc2 $jid(4) "PD\\s+$fedc1,$fedc2\\s+" "Job not running on designated cluster" +verify $fedc1 $jid(4) "PD\\s+$fedc1,$fedc2\\s+" "Job not running on designated cluster" +verify $fedc3 $jid(4) ".+Invalid.+" "Job found on wrong cluster" + +modify $jid(4) "!fa" "" "did not allow user to modify features of pending job" + +verify $fedc1 $jid(4) "RV\\s+$fedc2,$fedc3\\s+" "Job found on wrong cluster" +verify $fedc2 $jid(4) "PD\\s+$fedc2,$fedc3\\s+" "Job not running on designated cluster" +verify $fedc3 $jid(4) "PD\\s+$fedc2,$fedc3\\s+" "Job not running on designated cluster" + +modify $jid(4) "!fb,fc" "" "did not allow user to modify features of pending job" + +verify $fedc2 $jid(4) ".+Invalid.+" "Job found on wrong cluster" +verify $fedc1 $jid(4) "PD\\s+$fedc1\\s+" "Job not running on designated cluster" +verify $fedc3 $jid(4) ".+Invalid.+" "Job found on wrong cluster" + +modify $jid(4) "!fa,fc" "" "did not allow user to modify features of pending job" + +verify $fedc1 $jid(4) "RV\\s+$fedc2\\s+$fedc2" "Job found on wrong cluster" +verify $fedc2 $jid(4) "PD\\s+$fedc2\\s+" "Job not running on designated cluster" +verify $fedc3 $jid(4) ".+Invalid.+" "Job found on wrong cluster" + +modify $jid(4) "!fb,fa" "" "did not allow user to modify features of pending job" + +verify $fedc2 $jid(4) ".+Invalid.+" "Job found on wrong cluster" +verify $fedc3 $jid(4) "PD\\s+$fedc3\\s+" "Job not running on designated cluster" +verify $fedc1 $jid(4) "RV\\s+$fedc3\\s+$fedc3" "Job found on wrong cluster" + +#clear features again + +modify $jid(4) "" "" "did not allow user to modify features of pending job" + +verify $fedc1 $jid(4) "PD\\s+$fedc1,$fedc2,$fedc3\\s+$fedc1,$fedc2,$fedc3" "Job not running on designated cluster" +verify $fedc2 $jid(4) "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" +verify $fedc3 $jid(4) "PD\\s+$fedc1,$fedc2,$fedc3\\s+NA" "Job not running on designated cluster" + +#submit pending jobs to single cluster using negative constraints. + +set jid(0) [sbatch $my_sbatch $script !fc,fb] + +wait_for_fed_job $jid(0) PENDING $fedc1 + +verify $fedc2 $jid(0) ".+Invalid.+" "Job found on wrong cluster" +verify $fedc1 $jid(0) "PD\\s+$fedc1\\s+$fedc1" "Job not running on designated cluster" +verify $fedc3 $jid(0) ".+Invalid.+" "Job found on wrong cluster" + +set jid(0) [sbatch $my_sbatch $script !fa,fb] + +sleep 5 + +verify $fedc2 $jid(0) ".+Invalid.+" "Job found on wrong cluster" +verify $fedc3 $jid(0) "PD\\s+$fedc3\\s+NA" "Job not running on designated cluster" +verify $fedc1 $jid(0) "PD\\s+$fedc3\\s+$fedc3" "Job found on wrong cluster" + +set jid(0) [sbatch $my_sbatch $script !fc,fa] + +sleep 5 + +verify $fedc1 $jid(0) "PD\\s+$fedc2\\s+$fedc2" "Job found on wrong cluster" +verify $fedc2 $jid(0) "PD\\s+$fedc2\\s+NA" "Job not running on designated cluster" +verify $fedc3 $jid(0) ".+Invalid.+" "Job found on wrong cluster" + +#error test--negate all clusters. + +set matches 0 + +set command "$my_sbatch -N$node_count --exclusive --output=/dev/null \ + --error=/dev/null -t5 --wrap $script --cluster-constraint=!fa,fb,fc" +spawn {*}$command +expect { + -re ".+Invalid cluster feature specification" { + incr matches + } + timeout { + log_error "sbatch not responding" + end_it 1 + } + eof { + wait + } +} + +if {$matches != 1} { + log_error "batch submit failure" + end_it 1 +} # All Done end_it 0 diff --git a/testsuite/expect/test37.7 b/testsuite/expect/test37.7 index cf5dbf728c89bf2c70b23adc1b28de97f1a6bbab..64ff3b419e15bbab5b08d495f4b06e0f819ede3e 100755 --- a/testsuite/expect/test37.7 +++ b/testsuite/expect/test37.7 @@ -57,7 +57,7 @@ set my_sbatch "${fed_slurm_base}/$fedc1/bin/sbatch" set my_srun "${fed_slurm_base}/$fedc1/bin/srun" set my_salloc "${fed_slurm_base}/$fedc1/bin/salloc" set my_squeue "${fed_slurm_base}/$fedc1/bin/squeue" -set min_job_age [expr {[get_min_job_age] + 65}] +set drain_time 10 set eol "\r\n" @@ -403,15 +403,8 @@ sleep 10 squeue "" -log_info "Sleep $min_job_age seconds until system is drained" -sleep $min_job_age - -scontrol jobs "No jobs in the system" - -scontrol fed ".+DRAIN.+ACTIVE.+" - -log_info "Sleep 35 more seconds until system is drained" -sleep 35 +log_info "Sleep $drain_time seconds until system is drained" +sleep $drain_time scontrol fed ".+DRAINED.+ACTIVE.+" @@ -462,17 +455,28 @@ sbatch 1 1 squeue "" -scontrol jobs "JobId.+" - -log_info "Sleep $min_job_age seconds until system is drained" -sleep $min_job_age - -scontrol jobs "No jobs in the system" +log_info "Sleep $drain_time seconds until system is removed" +sleep $drain_time scontrol fed "" sacctmgr_show "\\s+ACTIVE\\s+" +# Once the system is drained, the controller will mark all jobs as non-requeueable. +scontrol jobs "Requeue=0.+" + + +#Make sure we can submit to removed cluster. + +set job_id [sbatch 1 0] + +if {[expr $job_id >> 26]} { + send_user "Removed cluster still giving federated job id\n" + end_it 1 +} + +wait_for_fed_job $job_id DONE $fedc1 + set matches 0 spawn $my_sacctmgr mod fed $fed_name set clusters+=$fedc1 -i