From 5ed0d6bbcbb8b123f4aa690b95be6bd6dcfddcba Mon Sep 17 00:00:00 2001 From: Christian Zimmermann Date: Mon, 15 Apr 2024 01:23:42 +0200 Subject: [PATCH] WIP: rarray load --- src/opt/mpi/include/rarray.cc.h | 139 +++++++++++++++++++++++--------- src/opt/mpi/include/rarray.h | 26 +++++- 2 files changed, 124 insertions(+), 41 deletions(-) diff --git a/src/opt/mpi/include/rarray.cc.h b/src/opt/mpi/include/rarray.cc.h index ba5938f..6585eb9 100644 --- a/src/opt/mpi/include/rarray.cc.h +++ b/src/opt/mpi/include/rarray.cc.h @@ -146,7 +146,7 @@ namespace CNORXZ auto li = iter<1,sizeof...(Indices)> ( [&](auto i) { return pack[CSizeT{}]; }, [](const auto&... x) { return mindexPtr( (x * ...) ); } ); - return roproot(*this, ri, li); + return croproot(*this, ri, li); } else { return (*mA)(pack); @@ -220,46 +220,111 @@ namespace CNORXZ return mGeom; } - SizeT getRankedSize(const RangePtr& r, const RangePtr& x) - { - SizeT rsize = 1; - for(SizeT mu = 0; mu != r->dim(); ++mu){ - const RangePtr s = r->sub(mu); - const RangePtr y = x->sub(mu); - if(s->size() > 1){ - rsize *= getRankedSize(s,y); - } - else { - - } - } - return rsize; - } - template template - void RCArray::load(const Sptr& i1, const Sptr& i2) const + void RCArray::load(const Sptr& lpi, const Sptr& ai, + const Sptr>& imap) const; { - VCHECK(i1->lex()); - VCHECK(i2->lex()); - /* - const SizeT rsize = getRankedSize(mGeom); - if(mMap.size() != rsize){ - mMap.resize(rsize); + // TODO: blocks!!! + const SizeT myrank = getRankNumber(); + const SizeT Nranks = getNumRanks(); + + const SizeT mapsize = ai->range()->size(); + mMap = Vector(mapsize,nullptr); + Vector> sendbuf(Nranks); + for(auto& sb: sendbuf){ + sb.reserve(mData.size()); } - const SizeT block = ; // size of un-ranked range - Vector sendbuf; - SizeT sendc = 0; - SizeT recvc = 0; - // make src-tar-map!!! - i1->ifor( operation( [](const SizeT ptar, const SizeT psrc) { - const SizeT sendr = psrc/mA.size(); - const SizeT recvr = ptar/mA.size(); - if(sendr == getRankNumber()) { } - if(recvr == getRankNumber()) { } - }, pos(i1), pos(i2) ) ); - // MPI_Sendrecv()!!! - */ + Vector> request(Nranks); + const SizeT locsz = lpi->local()->lmax().val(); + + // First loop: setup send buffer + lpi->ifor( mapXpr(ai, lpi, imap, + operation + ( [&](SizeT p, SizeT q) { + const SizeT r = p / locsz; + if(myrank != r){ + request[r].push_back(p % locsz); + } + } , posop(ai), posop(lpi) ) ) , + NoF {} )(); + + // transfer: + Vector reqsizes(Nranks); + SizeT bufsize = 0; + Vector> ext(Nranks); + for(auto& e: ext){ + e.resize(Nranks); + } + for(SizeT i = 0; i != Nranks; ++i){ + reqsizes[i] = request[i].size(); + bufsize += reqsizes[i]*blocks; + ext[myrank][i] = reqsizes[i]; + } + mBuf.resize(bufsize); + MPI_Status stat; + + // transfer requests: + for(SizeT o = 1; o != Nranks; ++o){ + const SizeT dstr = (myrank + o) % Nranks; + const SizeT srcr = (myrank - o + Nranks) % Nranks; + SizeT sendsize = 0; + MPI_Sendrecv(reqsizes.data()+dstr, 1, MPI_UNSIGNED_LONG, dstr, 0, + &sendsize, 1, MPI_UNSIGNED_LONG, srcr, 0, MPI_COMM_WORLD, &stat); + ext[srcr][myrank] = sendsize; + Vector sendpos(sendsize); + MPI_Sendrecv(request[dstr].data(), reqsizes[dstr], MPI_UNSIGNED_LONG, dstr, 0, + sendpos.data(), sendsize, MPI_UNSIGNED_LONG, srcr, 0, MPI_COMM_WORLD, &stat); + sendbuf[srcr].resize(sendsize*blocks); + for(SizeT i = 0; i != sendsize; ++i){ + std::memcpy( sendbuf[srcr].data()+i*blocks, mData.data()+sendpos[i]*blocks, blocks*sizeof(T) ); + } + } + + const MPI_Datatype dt = Typemap::value(); + + // transfer data: + for(SizeT o = 1; o != Nranks; ++o){ + const SizeT dstr = (myrank + o) % Nranks; + const SizeT srcr = (myrank - o + Nranks) % Nranks; + SizeT off = 0; + for(SizeT p = 0; p != srcr; ++p){ + off += ext[myrank][p]; + } + + MPI_Sendrecv(sendbuf[dstr].data(), ext[dstr][myrank]*blocks, dt, dstr, 0, + mBuf.data()+off*blocks, ext[myrank][srcr]*blocks, dt, srcr, 0, + MPI_COMM_WORLD, &stat); + + } + + // Second loop: Assign map to target buffer positions: + Vector cnt(Nranks); + lpi->ifor( mapXpr(ai, lpi, imap, + operation + ( [&](SizeT p, SizeT q) { + const SizeT r = p / locsz; + if(myrank != r){ + SizeT off = 0; + for(SizeT s = 0; s != r; ++s){ + off += ext[myrank][s]; + } + mMap[p] = mBuf.data() + off*blocks + cnt[r]*blocks; + ++cnt[r]; + } + mMap[q + myrank*locsz] = mData.data() + q*blocks; + } , posop(ai), posop(lpi) ) ), NoF {} )(); + + } + + template + template + Sptr> RCArray::load(const Sptr& i, const F& f) const + { + Sptr> imap = std::make_shared>(); + + load(i, /**/, imap); + return imap; } } // namespace mpi diff --git a/src/opt/mpi/include/rarray.h b/src/opt/mpi/include/rarray.h index 9b27696..9322a1a 100644 --- a/src/opt/mpi/include/rarray.h +++ b/src/opt/mpi/include/rarray.h @@ -110,13 +110,31 @@ namespace CNORXZ /** Get rank geometry. */ RangePtr geom() const; + /** Get the buffer map. + buffermap()[r (ranked)][i] returns the same as + loc[r * i] if 'loc' is a purely local array with the same content. + The buffer needs to be initialized before according to required range. + */ + const Vector buffermap() const; + /** Load all data from other ranks that is accessed by i2 in a loop over i1. - @param i1 Loop index. - @param i2 Access index. + imap indicates the position of i2 for a given position of i1. + @param lpi Loop index. + @param ai Access index. + @param imap Index position map. */ template - void load(const Sptr& i1, const Sptr& i2) const; - + void load(const Sptr& lpi, const Sptr& ai, + const Sptr>& imap) const; + + /** Load all data from other ranks that is accessed by f(i). + @param lpi Loop index. + @param f Map function. + @return Index position map. + */ + template + Sptr> load(const Sptr& lpi, const F& f) const; + private: ObjHandle> mA; RangePtr mGeom;