mpi: rarray: use setupBuffer function
This commit is contained in:
parent
a037598775
commit
6e16927442
3 changed files with 40 additions and 122 deletions
|
@ -225,31 +225,48 @@ namespace CNORXZ
|
||||||
void RCArray<T>::load(const Sptr<Index1>& lpi, const Sptr<Index2>& ai,
|
void RCArray<T>::load(const Sptr<Index1>& lpi, const Sptr<Index2>& ai,
|
||||||
const Sptr<Vector<SizeT>>& imap) const
|
const Sptr<Vector<SizeT>>& imap) const
|
||||||
{
|
{
|
||||||
// TODO: use setupBuffer from the test!!!
|
|
||||||
// TODO: blocks!!!
|
// TODO: blocks!!!
|
||||||
const SizeT blocks = 0; assert(0); // TODO!!!
|
const SizeT blocks = 0; assert(0); // TODO!!!
|
||||||
|
|
||||||
|
setupBuffer(ai, lpi, imap, *mA, mBuf, mMap, blocks);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
template <class Index, class F>
|
||||||
|
Sptr<Vector<SizeT>> RCArray<T>::load(const Sptr<Index>& i, const F& f) const
|
||||||
|
{
|
||||||
|
Sptr<Vector<SizeT>> imap = std::make_shared<Vector<SizeT>>();
|
||||||
|
|
||||||
|
//load(i, /**/, imap);
|
||||||
|
return imap;
|
||||||
|
}
|
||||||
|
|
||||||
|
template <class TarI, class RTarI, class SrcI, class RSrcI, typename T>
|
||||||
|
void setupBuffer(const Sptr<RIndex<TarI,RTarI>>& rgj, const Sptr<RIndex<SrcI,RSrcI>>& rgi,
|
||||||
|
const Sptr<Vector<SizeT>>& imap, const CArrayBase<T>& data,
|
||||||
|
Vector<T>& buf, Vector<const T*>& map, const SizeT blocks)
|
||||||
|
{
|
||||||
const SizeT myrank = getRankNumber();
|
const SizeT myrank = getRankNumber();
|
||||||
const SizeT Nranks = getNumRanks();
|
const SizeT Nranks = getNumRanks();
|
||||||
|
|
||||||
const SizeT mapsize = ai->range()->size();
|
const SizeT mapsize = rgj->range()->size();
|
||||||
mMap = Vector<const T*>(mapsize,nullptr);
|
map = Vector<const T*>(mapsize,nullptr);
|
||||||
Vector<Vector<T>> sendbuf(Nranks);
|
Vector<Vector<T>> sendbuf(Nranks);
|
||||||
for(auto& sb: sendbuf){
|
for(auto& sb: sendbuf){
|
||||||
sb.reserve(mA->size());
|
sb.reserve(data.size());
|
||||||
}
|
}
|
||||||
Vector<Vector<SizeT>> request(Nranks);
|
Vector<Vector<SizeT>> request(Nranks);
|
||||||
const SizeT locsz = lpi->local()->lmax().val();
|
const SizeT locsz = rgi->local()->lmax().val();
|
||||||
|
|
||||||
// First loop: setup send buffer
|
// First loop: setup send buffer
|
||||||
lpi->ifor( mapXpr(ai, lpi, imap,
|
rgi->ifor( mapXpr(rgj, rgi, imap,
|
||||||
operation
|
operation
|
||||||
( [&](SizeT p, SizeT q) {
|
( [&](SizeT p, SizeT q) {
|
||||||
const SizeT r = p / locsz;
|
const SizeT r = p / locsz;
|
||||||
if(myrank != r){
|
if(myrank != r){
|
||||||
request[r].push_back(p % locsz);
|
request[r].push_back(p % locsz);
|
||||||
}
|
}
|
||||||
} , posop(ai), posop(lpi) ) ) ,
|
} , posop(rgj), posop(rgi) ) ) ,
|
||||||
NoF {} )();
|
NoF {} )();
|
||||||
|
|
||||||
// transfer:
|
// transfer:
|
||||||
|
@ -264,7 +281,7 @@ namespace CNORXZ
|
||||||
bufsize += reqsizes[i]*blocks;
|
bufsize += reqsizes[i]*blocks;
|
||||||
ext[myrank][i] = reqsizes[i];
|
ext[myrank][i] = reqsizes[i];
|
||||||
}
|
}
|
||||||
mBuf.resize(bufsize);
|
buf.resize(bufsize);
|
||||||
MPI_Status stat;
|
MPI_Status stat;
|
||||||
|
|
||||||
// transfer requests:
|
// transfer requests:
|
||||||
|
@ -280,7 +297,7 @@ namespace CNORXZ
|
||||||
sendpos.data(), sendsize, MPI_UNSIGNED_LONG, srcr, 0, MPI_COMM_WORLD, &stat);
|
sendpos.data(), sendsize, MPI_UNSIGNED_LONG, srcr, 0, MPI_COMM_WORLD, &stat);
|
||||||
sendbuf[srcr].resize(sendsize*blocks);
|
sendbuf[srcr].resize(sendsize*blocks);
|
||||||
for(SizeT i = 0; i != sendsize; ++i){
|
for(SizeT i = 0; i != sendsize; ++i){
|
||||||
std::memcpy( sendbuf[srcr].data()+i*blocks, mA->data()+sendpos[i]*blocks, blocks*sizeof(T) );
|
std::memcpy( sendbuf[srcr].data()+i*blocks, data.data()+sendpos[i]*blocks, blocks*sizeof(T) );
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -296,14 +313,14 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
MPI_Sendrecv(sendbuf[dstr].data(), ext[dstr][myrank]*blocks, dt, dstr, 0,
|
MPI_Sendrecv(sendbuf[dstr].data(), ext[dstr][myrank]*blocks, dt, dstr, 0,
|
||||||
mBuf.data()+off*blocks, ext[myrank][srcr]*blocks, dt, srcr, 0,
|
buf.data()+off*blocks, ext[myrank][srcr]*blocks, dt, srcr, 0,
|
||||||
MPI_COMM_WORLD, &stat);
|
MPI_COMM_WORLD, &stat);
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Second loop: Assign map to target buffer positions:
|
// Second loop: Assign map to target buffer positions:
|
||||||
Vector<SizeT> cnt(Nranks);
|
Vector<SizeT> cnt(Nranks);
|
||||||
lpi->ifor( mapXpr(ai, lpi, imap,
|
rgi->ifor( mapXpr(rgj, rgi, imap,
|
||||||
operation
|
operation
|
||||||
( [&](SizeT p, SizeT q) {
|
( [&](SizeT p, SizeT q) {
|
||||||
const SizeT r = p / locsz;
|
const SizeT r = p / locsz;
|
||||||
|
@ -312,23 +329,13 @@ namespace CNORXZ
|
||||||
for(SizeT s = 0; s != r; ++s){
|
for(SizeT s = 0; s != r; ++s){
|
||||||
off += ext[myrank][s];
|
off += ext[myrank][s];
|
||||||
}
|
}
|
||||||
mMap[p] = mBuf.data() + off*blocks + cnt[r]*blocks;
|
map[p] = buf.data() + off*blocks + cnt[r]*blocks;
|
||||||
++cnt[r];
|
++cnt[r];
|
||||||
}
|
}
|
||||||
mMap[q + myrank*locsz] = mA->data() + q*blocks;
|
map[q + myrank*locsz] = data.data() + q*blocks;
|
||||||
} , posop(ai), posop(lpi) ) ), NoF {} )();
|
} , posop(rgj), posop(rgi) ) ), NoF {} )();
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T>
|
|
||||||
template <class Index, class F>
|
|
||||||
Sptr<Vector<SizeT>> RCArray<T>::load(const Sptr<Index>& i, const F& f) const
|
|
||||||
{
|
|
||||||
Sptr<Vector<SizeT>> imap = std::make_shared<Vector<SizeT>>();
|
|
||||||
|
|
||||||
//load(i, /**/, imap);
|
|
||||||
return imap;
|
|
||||||
}
|
|
||||||
|
|
||||||
} // namespace mpi
|
} // namespace mpi
|
||||||
} // namespace CNORXZ
|
} // namespace CNORXZ
|
||||||
|
|
|
@ -167,6 +167,13 @@ namespace CNORXZ
|
||||||
|
|
||||||
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <class TarI, class RTarI, class SrcI, class RSrcI, typename T>
|
||||||
|
void setupBuffer(const Sptr<RIndex<TarI,RTarI>>& rgj, const Sptr<RIndex<SrcI,RSrcI>>& rgi,
|
||||||
|
const Sptr<Vector<SizeT>>& imap, const CArrayBase<T>& data,
|
||||||
|
Vector<T>& buf, Vector<const T*>& map, const SizeT blocks);
|
||||||
|
|
||||||
|
|
||||||
} // namespace mpi
|
} // namespace mpi
|
||||||
} // namespace CNORXZ
|
} // namespace CNORXZ
|
||||||
|
|
||||||
|
|
|
@ -64,105 +64,10 @@ namespace
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
template <class TarIndex, class SrcIndex, typename T>
|
|
||||||
void setupBuffer(const Sptr<TarIndex>& rgj, const Sptr<SrcIndex>& rgi,
|
|
||||||
const Sptr<Vector<SizeT>>& fmap, const Vector<T>& data,
|
|
||||||
Vector<T>& buf, Vector<const T*>& map, const SizeT blocks)
|
|
||||||
{
|
|
||||||
const SizeT myrank = getRankNumber();
|
|
||||||
const SizeT Nranks = getNumRanks();
|
|
||||||
|
|
||||||
const SizeT mapsize = rgj->range()->size();
|
|
||||||
map = Vector<const T*>(mapsize,nullptr);
|
|
||||||
Vector<Vector<T>> sendbuf(Nranks);
|
|
||||||
for(auto& sb: sendbuf){
|
|
||||||
sb.reserve(data.size());
|
|
||||||
}
|
|
||||||
Vector<Vector<SizeT>> request(Nranks);
|
|
||||||
const SizeT locsz = rgi->local()->lmax().val();
|
|
||||||
|
|
||||||
// First loop: setup send buffer
|
|
||||||
rgi->ifor( mapXpr(rgj, rgi, fmap,
|
|
||||||
operation
|
|
||||||
( [&](SizeT p, SizeT q) {
|
|
||||||
const SizeT r = p / locsz;
|
|
||||||
if(myrank != r){
|
|
||||||
request[r].push_back(p % locsz);
|
|
||||||
}
|
|
||||||
} , posop(rgj), posop(rgi) ) ) ,
|
|
||||||
NoF {} )();
|
|
||||||
|
|
||||||
// transfer:
|
|
||||||
Vector<SizeT> reqsizes(Nranks);
|
|
||||||
SizeT bufsize = 0;
|
|
||||||
Vector<Vector<SizeT>> ext(Nranks);
|
|
||||||
for(auto& e: ext){
|
|
||||||
e.resize(Nranks);
|
|
||||||
}
|
|
||||||
for(SizeT i = 0; i != Nranks; ++i){
|
|
||||||
reqsizes[i] = request[i].size();
|
|
||||||
bufsize += reqsizes[i]*blocks;
|
|
||||||
ext[myrank][i] = reqsizes[i];
|
|
||||||
}
|
|
||||||
buf.resize(bufsize);
|
|
||||||
MPI_Status stat;
|
|
||||||
|
|
||||||
// transfer requests:
|
|
||||||
for(SizeT o = 1; o != Nranks; ++o){
|
|
||||||
const SizeT dstr = (myrank + o) % Nranks;
|
|
||||||
const SizeT srcr = (myrank - o + Nranks) % Nranks;
|
|
||||||
SizeT sendsize = 0;
|
|
||||||
MPI_Sendrecv(reqsizes.data()+dstr, 1, MPI_UNSIGNED_LONG, dstr, 0,
|
|
||||||
&sendsize, 1, MPI_UNSIGNED_LONG, srcr, 0, MPI_COMM_WORLD, &stat);
|
|
||||||
ext[srcr][myrank] = sendsize;
|
|
||||||
Vector<SizeT> sendpos(sendsize);
|
|
||||||
MPI_Sendrecv(request[dstr].data(), reqsizes[dstr], MPI_UNSIGNED_LONG, dstr, 0,
|
|
||||||
sendpos.data(), sendsize, MPI_UNSIGNED_LONG, srcr, 0, MPI_COMM_WORLD, &stat);
|
|
||||||
sendbuf[srcr].resize(sendsize*blocks);
|
|
||||||
for(SizeT i = 0; i != sendsize; ++i){
|
|
||||||
std::memcpy( sendbuf[srcr].data()+i*blocks, data.data()+sendpos[i]*blocks, blocks*sizeof(T) );
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
const MPI_Datatype dt = Typemap<T>::value();
|
|
||||||
|
|
||||||
// transfer data:
|
|
||||||
for(SizeT o = 1; o != Nranks; ++o){
|
|
||||||
const SizeT dstr = (myrank + o) % Nranks;
|
|
||||||
const SizeT srcr = (myrank - o + Nranks) % Nranks;
|
|
||||||
SizeT off = 0;
|
|
||||||
for(SizeT p = 0; p != srcr; ++p){
|
|
||||||
off += ext[myrank][p];
|
|
||||||
}
|
|
||||||
|
|
||||||
MPI_Sendrecv(sendbuf[dstr].data(), ext[dstr][myrank]*blocks, dt, dstr, 0,
|
|
||||||
buf.data()+off*blocks, ext[myrank][srcr]*blocks, dt, srcr, 0,
|
|
||||||
MPI_COMM_WORLD, &stat);
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
// Second loop: Assign map to target buffer positions:
|
|
||||||
Vector<SizeT> cnt(Nranks);
|
|
||||||
rgi->ifor( mapXpr(rgj, rgi, fmap,
|
|
||||||
operation
|
|
||||||
( [&](SizeT p, SizeT q) {
|
|
||||||
const SizeT r = p / locsz;
|
|
||||||
if(myrank != r){
|
|
||||||
SizeT off = 0;
|
|
||||||
for(SizeT s = 0; s != r; ++s){
|
|
||||||
off += ext[myrank][s];
|
|
||||||
}
|
|
||||||
map[p] = buf.data() + off*blocks + cnt[r]*blocks;
|
|
||||||
++cnt[r];
|
|
||||||
}
|
|
||||||
map[q + myrank*locsz] = data.data() + q*blocks;
|
|
||||||
} , posop(rgj), posop(rgi) ) ), NoF {} )();
|
|
||||||
}
|
|
||||||
|
|
||||||
TEST_F(Setbuf_Test, run)
|
TEST_F(Setbuf_Test, run)
|
||||||
{
|
{
|
||||||
const SizeT myrank = getRankNumber();
|
const SizeT myrank = getRankNumber();
|
||||||
//const SizeT Nranks = getNumRanks();
|
|
||||||
|
|
||||||
typedef UIndex<Int> UI;
|
typedef UIndex<Int> UI;
|
||||||
typedef MIndex<UI,UI,UI,UI> LocI;
|
typedef MIndex<UI,UI,UI,UI> LocI;
|
||||||
|
@ -174,16 +79,15 @@ namespace
|
||||||
LocI gj(mGRange);
|
LocI gj(mGRange);
|
||||||
auto ri = std::make_shared<RankI>(mGeom);
|
auto ri = std::make_shared<RankI>(mGeom);
|
||||||
constexpr auto C0 = CSizeT<0> {};
|
constexpr auto C0 = CSizeT<0> {};
|
||||||
//constexpr auto C1 = CSizeT<1> {};
|
|
||||||
constexpr auto C2 = CSizeT<2> {};
|
constexpr auto C2 = CSizeT<2> {};
|
||||||
constexpr auto C3 = CSizeT<3> {};
|
constexpr auto C3 = CSizeT<3> {};
|
||||||
|
|
||||||
const SizeT LSize = mRRange->sub(1)->size();
|
const SizeT LSize = mRRange->sub(1)->size();
|
||||||
const SizeT blocks = mSRange->size();
|
const SizeT blocks = mSRange->size();
|
||||||
|
|
||||||
Vector<Double> data(LSize*blocks);
|
MArray<Double> data(mRRange->sub(1)*mSRange);
|
||||||
for(SizeT i = 0; i != data.size(); ++i){
|
for(SizeT i = 0; i != data.size(); ++i){
|
||||||
data[i] = static_cast<Double>(LSize*myrank*blocks+i);
|
data.data()[i] = static_cast<Double>(LSize*myrank*blocks+i);
|
||||||
}
|
}
|
||||||
|
|
||||||
*rgj = 0;
|
*rgj = 0;
|
||||||
|
|
Loading…
Reference in a new issue