diff --git a/src/opt/mpi/include/rarray.cc.h b/src/opt/mpi/include/rarray.cc.h index 8d70f19..9d67dee 100644 --- a/src/opt/mpi/include/rarray.cc.h +++ b/src/opt/mpi/include/rarray.cc.h @@ -225,31 +225,48 @@ namespace CNORXZ void RCArray::load(const Sptr& lpi, const Sptr& ai, const Sptr>& imap) const { - // TODO: use setupBuffer from the test!!! // TODO: blocks!!! const SizeT blocks = 0; assert(0); // TODO!!! + setupBuffer(ai, lpi, imap, *mA, mBuf, mMap, blocks); + } + + template + template + Sptr> RCArray::load(const Sptr& i, const F& f) const + { + Sptr> imap = std::make_shared>(); + + //load(i, /**/, imap); + return imap; + } + + template + void setupBuffer(const Sptr>& rgj, const Sptr>& rgi, + const Sptr>& imap, const CArrayBase& data, + Vector& buf, Vector& map, const SizeT blocks) + { const SizeT myrank = getRankNumber(); const SizeT Nranks = getNumRanks(); - const SizeT mapsize = ai->range()->size(); - mMap = Vector(mapsize,nullptr); + const SizeT mapsize = rgj->range()->size(); + map = Vector(mapsize,nullptr); Vector> sendbuf(Nranks); for(auto& sb: sendbuf){ - sb.reserve(mA->size()); + sb.reserve(data.size()); } Vector> request(Nranks); - const SizeT locsz = lpi->local()->lmax().val(); + const SizeT locsz = rgi->local()->lmax().val(); // First loop: setup send buffer - lpi->ifor( mapXpr(ai, lpi, imap, + rgi->ifor( mapXpr(rgj, rgi, imap, operation ( [&](SizeT p, SizeT q) { const SizeT r = p / locsz; if(myrank != r){ request[r].push_back(p % locsz); } - } , posop(ai), posop(lpi) ) ) , + } , posop(rgj), posop(rgi) ) ) , NoF {} )(); // transfer: @@ -264,7 +281,7 @@ namespace CNORXZ bufsize += reqsizes[i]*blocks; ext[myrank][i] = reqsizes[i]; } - mBuf.resize(bufsize); + buf.resize(bufsize); MPI_Status stat; // transfer requests: @@ -280,7 +297,7 @@ namespace CNORXZ sendpos.data(), sendsize, MPI_UNSIGNED_LONG, srcr, 0, MPI_COMM_WORLD, &stat); sendbuf[srcr].resize(sendsize*blocks); for(SizeT i = 0; i != sendsize; ++i){ - std::memcpy( sendbuf[srcr].data()+i*blocks, mA->data()+sendpos[i]*blocks, blocks*sizeof(T) ); + std::memcpy( sendbuf[srcr].data()+i*blocks, data.data()+sendpos[i]*blocks, blocks*sizeof(T) ); } } @@ -296,14 +313,14 @@ namespace CNORXZ } MPI_Sendrecv(sendbuf[dstr].data(), ext[dstr][myrank]*blocks, dt, dstr, 0, - mBuf.data()+off*blocks, ext[myrank][srcr]*blocks, dt, srcr, 0, + buf.data()+off*blocks, ext[myrank][srcr]*blocks, dt, srcr, 0, MPI_COMM_WORLD, &stat); } // Second loop: Assign map to target buffer positions: Vector cnt(Nranks); - lpi->ifor( mapXpr(ai, lpi, imap, + rgi->ifor( mapXpr(rgj, rgi, imap, operation ( [&](SizeT p, SizeT q) { const SizeT r = p / locsz; @@ -312,23 +329,13 @@ namespace CNORXZ for(SizeT s = 0; s != r; ++s){ off += ext[myrank][s]; } - mMap[p] = mBuf.data() + off*blocks + cnt[r]*blocks; + map[p] = buf.data() + off*blocks + cnt[r]*blocks; ++cnt[r]; } - mMap[q + myrank*locsz] = mA->data() + q*blocks; - } , posop(ai), posop(lpi) ) ), NoF {} )(); - + map[q + myrank*locsz] = data.data() + q*blocks; + } , posop(rgj), posop(rgi) ) ), NoF {} )(); } - template - template - Sptr> RCArray::load(const Sptr& i, const F& f) const - { - Sptr> imap = std::make_shared>(); - - //load(i, /**/, imap); - return imap; - } } // namespace mpi } // namespace CNORXZ diff --git a/src/opt/mpi/include/rarray.h b/src/opt/mpi/include/rarray.h index 9322a1a..3766e0e 100644 --- a/src/opt/mpi/include/rarray.h +++ b/src/opt/mpi/include/rarray.h @@ -167,6 +167,13 @@ namespace CNORXZ }; + + template + void setupBuffer(const Sptr>& rgj, const Sptr>& rgi, + const Sptr>& imap, const CArrayBase& data, + Vector& buf, Vector& map, const SizeT blocks); + + } // namespace mpi } // namespace CNORXZ diff --git a/src/opt/mpi/tests/setbuf_unit_test.cc b/src/opt/mpi/tests/setbuf_unit_test.cc index ccccfc1..3eafab1 100644 --- a/src/opt/mpi/tests/setbuf_unit_test.cc +++ b/src/opt/mpi/tests/setbuf_unit_test.cc @@ -64,105 +64,10 @@ namespace }; - template - void setupBuffer(const Sptr& rgj, const Sptr& rgi, - const Sptr>& fmap, const Vector& data, - Vector& buf, Vector& map, const SizeT blocks) - { - const SizeT myrank = getRankNumber(); - const SizeT Nranks = getNumRanks(); - - const SizeT mapsize = rgj->range()->size(); - map = Vector(mapsize,nullptr); - Vector> sendbuf(Nranks); - for(auto& sb: sendbuf){ - sb.reserve(data.size()); - } - Vector> request(Nranks); - const SizeT locsz = rgi->local()->lmax().val(); - - // First loop: setup send buffer - rgi->ifor( mapXpr(rgj, rgi, fmap, - operation - ( [&](SizeT p, SizeT q) { - const SizeT r = p / locsz; - if(myrank != r){ - request[r].push_back(p % locsz); - } - } , posop(rgj), posop(rgi) ) ) , - NoF {} )(); - - // transfer: - Vector reqsizes(Nranks); - SizeT bufsize = 0; - Vector> ext(Nranks); - for(auto& e: ext){ - e.resize(Nranks); - } - for(SizeT i = 0; i != Nranks; ++i){ - reqsizes[i] = request[i].size(); - bufsize += reqsizes[i]*blocks; - ext[myrank][i] = reqsizes[i]; - } - buf.resize(bufsize); - MPI_Status stat; - - // transfer requests: - for(SizeT o = 1; o != Nranks; ++o){ - const SizeT dstr = (myrank + o) % Nranks; - const SizeT srcr = (myrank - o + Nranks) % Nranks; - SizeT sendsize = 0; - MPI_Sendrecv(reqsizes.data()+dstr, 1, MPI_UNSIGNED_LONG, dstr, 0, - &sendsize, 1, MPI_UNSIGNED_LONG, srcr, 0, MPI_COMM_WORLD, &stat); - ext[srcr][myrank] = sendsize; - Vector sendpos(sendsize); - MPI_Sendrecv(request[dstr].data(), reqsizes[dstr], MPI_UNSIGNED_LONG, dstr, 0, - sendpos.data(), sendsize, MPI_UNSIGNED_LONG, srcr, 0, MPI_COMM_WORLD, &stat); - sendbuf[srcr].resize(sendsize*blocks); - for(SizeT i = 0; i != sendsize; ++i){ - std::memcpy( sendbuf[srcr].data()+i*blocks, data.data()+sendpos[i]*blocks, blocks*sizeof(T) ); - } - } - - const MPI_Datatype dt = Typemap::value(); - - // transfer data: - for(SizeT o = 1; o != Nranks; ++o){ - const SizeT dstr = (myrank + o) % Nranks; - const SizeT srcr = (myrank - o + Nranks) % Nranks; - SizeT off = 0; - for(SizeT p = 0; p != srcr; ++p){ - off += ext[myrank][p]; - } - - MPI_Sendrecv(sendbuf[dstr].data(), ext[dstr][myrank]*blocks, dt, dstr, 0, - buf.data()+off*blocks, ext[myrank][srcr]*blocks, dt, srcr, 0, - MPI_COMM_WORLD, &stat); - - } - - // Second loop: Assign map to target buffer positions: - Vector cnt(Nranks); - rgi->ifor( mapXpr(rgj, rgi, fmap, - operation - ( [&](SizeT p, SizeT q) { - const SizeT r = p / locsz; - if(myrank != r){ - SizeT off = 0; - for(SizeT s = 0; s != r; ++s){ - off += ext[myrank][s]; - } - map[p] = buf.data() + off*blocks + cnt[r]*blocks; - ++cnt[r]; - } - map[q + myrank*locsz] = data.data() + q*blocks; - } , posop(rgj), posop(rgi) ) ), NoF {} )(); - } TEST_F(Setbuf_Test, run) { const SizeT myrank = getRankNumber(); - //const SizeT Nranks = getNumRanks(); typedef UIndex UI; typedef MIndex LocI; @@ -174,16 +79,15 @@ namespace LocI gj(mGRange); auto ri = std::make_shared(mGeom); constexpr auto C0 = CSizeT<0> {}; - //constexpr auto C1 = CSizeT<1> {}; constexpr auto C2 = CSizeT<2> {}; constexpr auto C3 = CSizeT<3> {}; const SizeT LSize = mRRange->sub(1)->size(); const SizeT blocks = mSRange->size(); - Vector data(LSize*blocks); + MArray data(mRRange->sub(1)*mSRange); for(SizeT i = 0; i != data.size(); ++i){ - data[i] = static_cast(LSize*myrank*blocks+i); + data.data()[i] = static_cast(LSize*myrank*blocks+i); } *rgj = 0;