From 4d4c8c17a01669d599ff9bdb971f6f68f62e04c7 Mon Sep 17 00:00:00 2001 From: Artem Polyakov <artpol84@gmail.com> Date: Wed, 11 Nov 2015 14:54:59 +0600 Subject: [PATCH] 1. Prepare to the new state machine 2. Fail with SIGKILL if state machine misbehaves. --- src/plugins/mpi/pmix/pmixp_coll.c | 25 ++++++++++++++++++++++--- src/plugins/mpi/pmix/pmixp_coll.h | 2 -- src/plugins/mpi/pmix/pmixp_server.c | 24 +++++++++++++++--------- 3 files changed, 37 insertions(+), 14 deletions(-) diff --git a/src/plugins/mpi/pmix/pmixp_coll.c b/src/plugins/mpi/pmix/pmixp_coll.c index 411ac66ee62..d31488473de 100644 --- a/src/plugins/mpi/pmix/pmixp_coll.c +++ b/src/plugins/mpi/pmix/pmixp_coll.c @@ -99,6 +99,26 @@ static int _pack_ranges(pmixp_coll_t *coll) return SLURM_SUCCESS; } +static void _fan_in_finished(pmixp_coll_t *coll) +{ + xassert(PMIXP_COLL_FAN_IN == coll->state); + coll->state = PMIXP_COLL_FAN_OUT; + memset(coll->ch_contribs, 0, sizeof(int) * coll->children_cnt); + coll->contrib_cntr = 0; + coll->contrib_local = 0; + set_buf_offset(coll->buf, coll->serv_offs); + if (SLURM_SUCCESS != _pack_ranges(coll)) { + PMIXP_ERROR("Cannot pack ranges to coll message header!"); + } +} + +static void _fan_out_finished(pmixp_coll_t *coll) +{ + xassert( PMIXP_COLL_FAN_OUT == coll->state /* || fan_out_in */); + coll->state = PMIXP_COLL_SYNC; + coll->seq++; /* move to the next collective */ +} + static void _reset_coll(pmixp_coll_t *coll) { switch (coll->state) { @@ -535,8 +555,7 @@ static void _progress_fan_in(pmixp_coll_t *coll) } /* transit to the next state */ - coll->state = PMIXP_COLL_FAN_OUT; - set_buf_offset(coll->buf, 0); + _fan_in_finished(coll); /* if we are root - push data to PMIx here. * Originally there was a homogenuous solution: root nodename was in the hostlist. @@ -577,7 +596,7 @@ void _progres_fan_out(pmixp_coll_t *coll, Buf buf) pmixp_free_Buf, (void *)buf); } /* Prepare for the next collective operation */ - _reset_coll(coll); + _fan_out_finished(coll); PMIXP_DEBUG("%s:%d: collective is prepared for the next use", pmixp_info_namespace(), pmixp_info_nodeid()); diff --git a/src/plugins/mpi/pmix/pmixp_coll.h b/src/plugins/mpi/pmix/pmixp_coll.h index 2c05834e850..248cf4faaec 100644 --- a/src/plugins/mpi/pmix/pmixp_coll.h +++ b/src/plugins/mpi/pmix/pmixp_coll.h @@ -170,8 +170,6 @@ static inline int pmixp_coll_check_seq(pmixp_coll_t *coll, uint32_t seq, * want to discard this message */ return SLURM_ERROR; } - PMIXP_ERROR("Bad collective seq. #%d from %s, current is %d", seq, - nodename, coll->seq); /* maybe need more sophisticated handling in presence of * several steps. However maybe it's enough to just ignore */ /* slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(), SIGKILL); */ diff --git a/src/plugins/mpi/pmix/pmixp_server.c b/src/plugins/mpi/pmix/pmixp_server.c index 93bd06e9abd..c049d5b2b35 100644 --- a/src/plugins/mpi/pmix/pmixp_server.c +++ b/src/plugins/mpi/pmix/pmixp_server.c @@ -411,16 +411,22 @@ static void _process_server_request(recv_header_t *_hdr, void *payload) coll = pmixp_state_coll_get(type, procs, nprocs); xfree(procs); - PMIXP_DEBUG( - "FENCE collective message from node \"%s\", type = %s", - nodename, - (PMIXP_MSG_FAN_IN == hdr->type) ? - "fan-in" : "fan-out"); - - if (SLURM_SUCCESS - != pmixp_coll_check_seq(coll, hdr->seq, + PMIXP_DEBUG("FENCE collective message from node \"%s\", type = %s", + nodename, + (PMIXP_MSG_FAN_IN == hdr->type) ? "fan-in" : "fan-out"); + + if (SLURM_SUCCESS != pmixp_coll_check_seq(coll, hdr->seq, nodename)) { - /* stop processing discardig this message */ + /* this is unexepable event: either something went + * really wrong or the state machine is incorrect. + * This will 100% lead to application hang. + */ + PMIXP_ERROR("Bad collective seq. #%d from %s, current is %d", + hdr->seq, nodename, coll->seq); + pmixp_debug_hang(0); /* enable hang to debug this! */ + slurm_kill_job_step(pmixp_info_jobid(), pmixp_info_stepid(), + SIGKILL); + break; } -- GitLab