mpi: completely remove the rank shift when doing global operations + fixes
This commit is contained in:
parent
0ee2e0fca2
commit
31e892005b
13 changed files with 158 additions and 154 deletions
70
src/include/xpr/acc_xpr.cc.h
Normal file
70
src/include/xpr/acc_xpr.cc.h
Normal file
|
@ -0,0 +1,70 @@
|
|||
|
||||
#ifndef __cxz_acc_xpr_cc_h__
|
||||
#define __cxz_acc_xpr_cc_h__
|
||||
|
||||
#include "acc_xpr.h"
|
||||
|
||||
namespace CNORXZ
|
||||
{
|
||||
template <SizeT L, class Xpr, class F>
|
||||
constexpr AccXpr<L,Xpr,F>::AccXpr(SizeT n, const IndexId<L>& id,
|
||||
const Xpr& xpr, F&& f) :
|
||||
mN(n),
|
||||
mId(id),
|
||||
mXpr(xpr),
|
||||
mExt(mXpr.rootSteps(mId)),
|
||||
mF(std::forward<F>(f))
|
||||
{}
|
||||
|
||||
template <SizeT L, class Xpr, class F>
|
||||
template <class PosT>
|
||||
inline decltype(auto) AccXpr<L,Xpr,F>::operator()(const PosT& last) const
|
||||
{
|
||||
if constexpr(std::is_same<typename std::remove_reference<F>::type,NoF>::value){
|
||||
const auto pos = last + mExt( UPos(mN) );
|
||||
mXpr(pos);
|
||||
return None {};
|
||||
}
|
||||
else {
|
||||
typedef typename
|
||||
std::remove_reference<decltype(mXpr(last + mExt( UPos(0) )))>::type OutT;
|
||||
auto o = OutT();
|
||||
const auto pos = last + mExt( UPos(mN) );
|
||||
mF(o, mXpr(pos));
|
||||
return o;
|
||||
}
|
||||
}
|
||||
|
||||
template <SizeT L, class Xpr, class F>
|
||||
inline decltype(auto) AccXpr<L,Xpr,F>::operator()() const
|
||||
{
|
||||
if constexpr(std::is_same<typename std::remove_reference<F>::type,NoF>::value){
|
||||
const auto pos = mExt( UPos(mN) );
|
||||
mXpr(pos);
|
||||
return None {};
|
||||
}
|
||||
else {
|
||||
typedef typename
|
||||
std::remove_reference<decltype(mXpr( mExt( UPos(0) )))>::type OutT;
|
||||
auto o = OutT();
|
||||
const auto pos = mExt( UPos(mN) );
|
||||
mF(o, mXpr(pos));
|
||||
return o;
|
||||
}
|
||||
}
|
||||
|
||||
template <SizeT L, class Xpr, class F>
|
||||
template <SizeT I>
|
||||
inline decltype(auto) AccXpr<L,Xpr,F>::rootSteps(const IndexId<I>& id) const
|
||||
{
|
||||
return mXpr.rootSteps(id);
|
||||
}
|
||||
|
||||
template <SizeT L, class Xpr, class F>
|
||||
constexpr decltype(auto) accxpr(SizeT n, const IndexId<L>& id, const Xpr& xpr, F&& f)
|
||||
{
|
||||
return AccXpr<L,Xpr,F>(n, id, xpr, std::forward<F>(f));
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
41
src/include/xpr/acc_xpr.h
Normal file
41
src/include/xpr/acc_xpr.h
Normal file
|
@ -0,0 +1,41 @@
|
|||
|
||||
// rank access expression, fix rank position to current rank
|
||||
|
||||
#ifndef __cxz_acc_xpr_h__
|
||||
#define __cxz_acc_xpr_h__
|
||||
|
||||
//#include "base/base.h"
|
||||
#include "xpr_base.h"
|
||||
|
||||
namespace CNORXZ
|
||||
{
|
||||
template <SizeT L, class Xpr, class F = NoF>
|
||||
class AccXpr : public XprInterface<AccXpr<L,Xpr,F>>
|
||||
{
|
||||
public:
|
||||
DEFAULT_MEMBERS(AccXpr);
|
||||
|
||||
constexpr AccXpr(SizeT n, const IndexId<L>& id, const Xpr& xpr, F&& f);
|
||||
|
||||
template <class PosT>
|
||||
inline decltype(auto) operator()(const PosT& last) const;
|
||||
|
||||
inline decltype(auto) operator()() const;
|
||||
|
||||
template <SizeT I>
|
||||
inline decltype(auto) rootSteps(const IndexId<I>& id) const;
|
||||
|
||||
private:
|
||||
SizeT mN = 0;
|
||||
IndexId<L> mId;
|
||||
Xpr mXpr;
|
||||
typedef decltype(mXpr.rootSteps(mId)) XPosT;
|
||||
XPosT mExt;
|
||||
F mF;
|
||||
};
|
||||
|
||||
template <SizeT L, class Xpr, class F = NoF>
|
||||
constexpr decltype(auto) accxpr(SizeT n, const IndexId<L>& id, const Xpr& xpr, F&& f);
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,73 +0,0 @@
|
|||
|
||||
#ifndef __cxz_racc_xpr_cc_h__
|
||||
#define __cxz_racc_xpr_cc_h__
|
||||
|
||||
#include "racc_xpr.h"
|
||||
|
||||
namespace CNOXRZ
|
||||
{
|
||||
namespace mpi
|
||||
{
|
||||
template <SizeT L, class Xpr, class F = NoF>
|
||||
constexpr AccXpr<L,Xpr,F>::AccXpr(SizeT n, const IndexId<L>& id,
|
||||
const Xpr& xpr, F&& f) :
|
||||
mN(n),
|
||||
mId(id),
|
||||
mXpr(xpr),
|
||||
mExt(mXpr.rootSteps(mId)),
|
||||
mF(std::forward<F>(f))
|
||||
{}
|
||||
|
||||
template <SizeT L, class Xpr, class F = NoF>
|
||||
template <class PosT>
|
||||
inline decltype(auto) AccXpr<L,Xpr,F>::operator()(const PosT& last) const
|
||||
{
|
||||
if constexpr(std::is_same<typename std::remove_reference<F>::type,NoF>::value){
|
||||
const auto pos = last + mExt( UPos(mN) );
|
||||
mXpr(pos);
|
||||
return None {};
|
||||
}
|
||||
else {
|
||||
typedef typename
|
||||
std::remove_reference<decltype(mXpr(last + mExt( UPos(0) )))>::type OutT;
|
||||
auto o = OutT();
|
||||
const auto pos = last + mExt( UPos(mN) );
|
||||
mF(o, mXpr(pos));
|
||||
return o;
|
||||
}
|
||||
}
|
||||
|
||||
template <SizeT L, class Xpr, class F = NoF>
|
||||
inline decltype(auto) AccXpr<L,Xpr,F>::operator()() const
|
||||
{
|
||||
if constexpr(std::is_same<typename std::remove_reference<F>::type,NoF>::value){
|
||||
const auto pos = mExt( UPos(mN) );
|
||||
mXpr(pos);
|
||||
return None {};
|
||||
}
|
||||
else {
|
||||
typedef typename
|
||||
std::remove_reference<decltype(mXpr(last + mExt( UPos(0) )))>::type OutT;
|
||||
auto o = OutT();
|
||||
const auto pos = mExt( UPos(mN) );
|
||||
mF(o, mXpr(pos));
|
||||
return o;
|
||||
}
|
||||
}
|
||||
|
||||
template <SizeT L, class Xpr, class F = NoF>
|
||||
template <SizeT I>
|
||||
inline decltype(auto) AccXpr<L,Xpr,F>::rootSteps(const IndexId<I>& id) const
|
||||
{
|
||||
return mXpr.rootSteps(id);
|
||||
}
|
||||
|
||||
template <SizeT L, class Xpr, class F = NoF>
|
||||
constexpr decltype(auto) accxpr(SizeT n, const IndexId<L>& id, const Xpr& xpr, F&& f)
|
||||
{
|
||||
return AccXpr<L,Xpr,F>(size, id, xpr, std::forward<F>(f));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -1,44 +0,0 @@
|
|||
|
||||
// rank access expression, fix rank position to current rank
|
||||
|
||||
#ifndef __cxz_racc_xpr_h__
|
||||
#define __cxz_racc_xpr_h__
|
||||
|
||||
#include "mpi_base.h"
|
||||
|
||||
namespace CNORXZ
|
||||
{
|
||||
namespace mpi
|
||||
{
|
||||
|
||||
template <SizeT L, class Xpr, class F = NoF>
|
||||
class AccXpr : public XprInterface<AccXpr<L,Xpr,F>>
|
||||
{
|
||||
public:
|
||||
DEFAULT_MEMBERS(AccXpr);
|
||||
|
||||
constexpr AccXpr(SizeT n, const IndexId<L>& id, const Xpr& xpr, F&& f);
|
||||
|
||||
template <class PosT>
|
||||
inline decltype(auto) operator()(const PosT& last) const;
|
||||
|
||||
inline decltype(auto) operator()() const;
|
||||
|
||||
template <SizeT I>
|
||||
inline decltype(auto) rootSteps(const IndexId<I>& id) const;
|
||||
|
||||
private:
|
||||
SizeT mN = 0;
|
||||
IndexId<L> mId;
|
||||
Xpr mXpr;
|
||||
typedef decltype(mXpr.rootSteps(mId)) XPosT;
|
||||
XPosT mExt;
|
||||
F mF;
|
||||
};
|
||||
|
||||
template <SizeT L, class Xpr, class F = NoF>
|
||||
constexpr decltype(auto) accxpr(SizeT n, const IndexId<L>& id, const Xpr& xpr, F&& f);
|
||||
}
|
||||
}
|
||||
|
||||
#endif
|
|
@ -16,3 +16,4 @@
|
|||
#include "index_id.cc.h"
|
||||
#include "func.cc.h"
|
||||
#include "map_xpr.cc.h"
|
||||
#include "acc_xpr.cc.h"
|
||||
|
|
|
@ -16,5 +16,6 @@
|
|||
#include "index_id.h"
|
||||
#include "func.h"
|
||||
#include "map_xpr.h"
|
||||
#include "acc_xpr.h"
|
||||
|
||||
#include "xpr.cc.h"
|
||||
|
|
|
@ -169,6 +169,7 @@ namespace CNORXZ
|
|||
inline decltype(auto) RCArray<T>::operator()(const DPack& pack) const
|
||||
{
|
||||
// TODO: assert that none of the indices is rank index
|
||||
CXZ_ERROR("not implemented");
|
||||
return (*mA)(pack);
|
||||
}
|
||||
|
||||
|
@ -361,6 +362,7 @@ namespace CNORXZ
|
|||
inline decltype(auto) RArray<T>::operator()(const DPack& pack) const
|
||||
{
|
||||
// TODO: assert that none of the indices is rank index
|
||||
CXZ_ERROR("not implemented");
|
||||
return (*mB)(pack);
|
||||
}
|
||||
|
||||
|
@ -507,14 +509,13 @@ namespace CNORXZ
|
|||
}
|
||||
|
||||
// Third loop: Assign map to target buffer positions:
|
||||
const SizeT myrankoff = myrank*locsz;
|
||||
assert(mapsize == Nranks*locsz);
|
||||
Vector<SizeT> cnt(Nranks);
|
||||
mi->ifor( operation
|
||||
( [&](SizeT p) {
|
||||
const SizeT r = p / locsz;
|
||||
const SizeT l = p % locsz;
|
||||
const SizeT mpidx = (p - myrankoff + mapsize) % mapsize;
|
||||
const SizeT mpidx = p;
|
||||
if(myrank != r and required[p]){
|
||||
SizeT off = 0;
|
||||
for(SizeT s = 0; s != r; ++s){
|
||||
|
@ -524,7 +525,7 @@ namespace CNORXZ
|
|||
++cnt[r];
|
||||
}
|
||||
if(myrank == r){
|
||||
assert(mpidx < locsz);
|
||||
assert(mpidx < (myrank+1)*locsz);
|
||||
map[mpidx] = data.data() + l*blocks;
|
||||
}
|
||||
} , posop(mi) ), NoF {} )();
|
||||
|
|
|
@ -23,23 +23,26 @@ namespace CNORXZ
|
|||
const Sptr<SrcIndex>& si,
|
||||
const F& f, const Sptr<Vector<SizeT>>& m)
|
||||
{
|
||||
// This was the old shift, keep it here as comment if we want to introduce other shifts
|
||||
// in order to reduce memory consumption by the maps;
|
||||
// remember to invert the shift in the map xpr BEFORE calling the map!
|
||||
//const SizeT locsz = tix.local()->pmax().val();
|
||||
//const SizeT tarsize = locsz*mpi::getNumRanks();
|
||||
//const SizeT idx = (tix.pos() - locsz*myrank + tarsize) % tarsize;
|
||||
|
||||
auto six = *si;
|
||||
auto sie = si->range()->end();
|
||||
auto tix = *ti;
|
||||
const SizeT locsz = tix.local()->pmax().val();
|
||||
const SizeT tarsize = locsz*mpi::getNumRanks();
|
||||
const SizeT mapsize = m->size();
|
||||
const SizeT myrank = mpi::getRankNumber();
|
||||
if constexpr(mpi::is_rank_index<SrcIndex>::value){
|
||||
CXZ_ASSERT(mapsize == six.local()->pmax().val(), "map not well-formatted: size = "
|
||||
CXZ_ASSERT(mapsize == six.pmax().val(), "map not well-formatted: size = "
|
||||
<< mapsize << ", expected " << six.local()->pmax().val());
|
||||
for(six = 0; six != sie; ++six){
|
||||
tix.at( f(*six) );
|
||||
if(six.rank() == myrank){
|
||||
//const SizeT idx = (tix.pos() - locsz*tix.rank() + tarsize) % tarsize;
|
||||
const SizeT idx = (tix.pos() - locsz*myrank + tarsize) % tarsize;
|
||||
//const SizeT idx = tix.pos();
|
||||
(*m)[six.local()->pos()] = idx;
|
||||
const SizeT idx = tix.pos();
|
||||
(*m)[six.pos()] = idx;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -48,9 +51,7 @@ namespace CNORXZ
|
|||
<< mapsize << ", expected " << six.pmax().val());
|
||||
for(six = 0; six != sie; ++six){
|
||||
tix.at( f(*six) );
|
||||
//const SizeT idx = (tix.pos() - locsz*tix.rank() + tarsize) % tarsize;
|
||||
const SizeT idx = (tix.pos() - locsz*myrank + tarsize) % tarsize;
|
||||
//const SizeT idx = tix.pos()
|
||||
const SizeT idx = tix.pos()
|
||||
(*m)[six.pos()] = idx;
|
||||
}
|
||||
}
|
||||
|
@ -62,13 +63,7 @@ namespace CNORXZ
|
|||
const Sptr<SrcIndex>& si,
|
||||
const F& f)
|
||||
{
|
||||
SizeT mapsize = 0;
|
||||
if constexpr(mpi::is_rank_index<SrcIndex>::value){
|
||||
mapsize = si->local()->lmax().val();
|
||||
}
|
||||
else {
|
||||
mapsize = si->lmax().val();
|
||||
}
|
||||
const SizeT mapsize = si->pmax().val();
|
||||
auto o = std::make_shared<Vector<SizeT>>(mapsize);
|
||||
setup(ti,si,f,o);
|
||||
return o;
|
||||
|
|
|
@ -29,12 +29,16 @@ namespace CNORXZ
|
|||
template <class PosT>
|
||||
constexpr decltype(auto) CROpRoot<T,RIndexT,IndexT>::operator()(const PosT& pos) const
|
||||
{
|
||||
//CXZ_ASSERT(pos.val() < mRIndex->pmax().val(), pos.val() << ">=" << mRIndex->pmax().val());
|
||||
//CXZ_ASSERT(mData[pos.val()] != nullptr, "data[" << pos.val() << "] == null");
|
||||
//CXZ_ASSERT(pos.next().val() < mIndex->pmax().val(), pos.val() << ">=" << mIndex->pmax().val());
|
||||
return (mData[pos.val()])[pos.next().val()];
|
||||
}
|
||||
|
||||
template <typename T, class RIndexT, class IndexT>
|
||||
constexpr decltype(auto) CROpRoot<T,RIndexT,IndexT>::operator()() const
|
||||
{
|
||||
//CXZ_ASSERT(mData[0] != nullptr, "data[" << 0 << "] == null");
|
||||
return (mData[0])[0];
|
||||
}
|
||||
|
||||
|
@ -61,11 +65,9 @@ namespace CNORXZ
|
|||
const Sptr<IndexT>& li) :
|
||||
mLocal(&a.local()),
|
||||
mData(a.buffermap().data()),
|
||||
//mData(a.data()),
|
||||
mRIndex(ri),
|
||||
mIndex(li)
|
||||
{
|
||||
//CXZ_ERROR("nope");
|
||||
CXZ_ASSERT(a.buffermap().size() == ri->lmax().val(),
|
||||
"data map not properly initialized: map size = " << a.buffermap().size()
|
||||
<< ", rank index range size = " << ri->lmax().val());
|
||||
|
@ -75,8 +77,8 @@ namespace CNORXZ
|
|||
template <class Op>
|
||||
constexpr ROpRoot<T,RIndexT,IndexT>& ROpRoot<T,RIndexT,IndexT>::operator=(const Op& in)
|
||||
{
|
||||
(*mLocal)(mindexPtr(mRIndex->local()*mIndex)) = in;
|
||||
//OI::a(mIndex, [](auto& a, const auto& b) { a = b; }, in);
|
||||
(*mLocal)(mindexPtr(mRIndex->local()*mIndex)).a
|
||||
(mindexPtr(mRIndex*mIndex),[](auto& a, const auto& b) { a = b; }, in);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -84,16 +86,16 @@ namespace CNORXZ
|
|||
template <class Op>
|
||||
constexpr ROpRoot<T,RIndexT,IndexT>& ROpRoot<T,RIndexT,IndexT>::operator+=(const Op& in)
|
||||
{
|
||||
(*mLocal)(mindexPtr(mRIndex->local()*mIndex)) += in;
|
||||
//OI::a(mIndex, [](auto& a, const auto& b) { a += b; }, in);
|
||||
(*mLocal)(mindexPtr(mRIndex->local()*mIndex)).a
|
||||
(mindexPtr(mRIndex*mIndex),[](auto& a, const auto& b) { a += b; }, in);
|
||||
return *this;
|
||||
}
|
||||
|
||||
template <typename T, class RIndexT, class IndexT>
|
||||
constexpr ROpRoot<T,RIndexT,IndexT>& ROpRoot<T,RIndexT,IndexT>::operator=(const ROpRoot& in)
|
||||
{
|
||||
(*mLocal)(mindexPtr(mRIndex->local()*mIndex)) = in;
|
||||
//OI::a(mIndex, [](auto& a, const auto& b) { a = b; }, in);
|
||||
(*mLocal)(mindexPtr(mRIndex->local()*mIndex)).a
|
||||
(mindexPtr(mRIndex*mIndex),[](auto& a, const auto& b) { a = b; }, in);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -101,12 +103,16 @@ namespace CNORXZ
|
|||
template <class PosT>
|
||||
constexpr decltype(auto) ROpRoot<T,RIndexT,IndexT>::operator()(const PosT& pos) const
|
||||
{
|
||||
//CXZ_ASSERT(pos.val() < mRIndex->pmax().val(), pos.val() << ">=" << mRIndex->pmax().val());
|
||||
//CXZ_ASSERT(mData[pos.val()] != nullptr, "data[" << pos.val() << "] == null");
|
||||
//CXZ_ASSERT(pos.next().val() < mIndex->pmax().val(), pos.val() << ">=" << mIndex->pmax().val());
|
||||
return (mData[pos.val()])[pos.next().val()];
|
||||
}
|
||||
|
||||
template <typename T, class RIndexT, class IndexT>
|
||||
constexpr decltype(auto) ROpRoot<T,RIndexT,IndexT>::operator()() const
|
||||
{
|
||||
//CXZ_ASSERT(mData[0] != nullptr, "data[" << 0 << "] == null");
|
||||
return (mData[0])[0];
|
||||
}
|
||||
|
||||
|
|
|
@ -116,6 +116,12 @@ namespace CNORXZ
|
|||
static constexpr SizeT value = 2;
|
||||
};
|
||||
|
||||
template <typename T, class RIndexT, class IndexT>
|
||||
struct op_size<mpi::ROpRoot<T,RIndexT,IndexT>>
|
||||
{
|
||||
static constexpr SizeT value = 2;
|
||||
};
|
||||
|
||||
} // namespace CNORXZ
|
||||
|
||||
#endif
|
||||
|
|
|
@ -231,8 +231,7 @@ namespace CNORXZ
|
|||
if constexpr(I != 0){ return SPos<0> {}; }
|
||||
else { return UPos(id == this->id() ? 1 : 0); }
|
||||
};
|
||||
return mI->stepSize(id) + own();
|
||||
//return getRankStepSize(id);
|
||||
return mK->stepSize(id) * mI->pmax() * UPos(mRankFormat) + mI->stepSize(id) + own();
|
||||
}
|
||||
|
||||
template <class IndexI, class IndexK>
|
||||
|
@ -330,7 +329,8 @@ namespace CNORXZ
|
|||
template <class Xpr, class F>
|
||||
constexpr decltype(auto) RIndex<IndexI,IndexK>::ifor(const Xpr& xpr, F&& f) const
|
||||
{
|
||||
return mI->ifor(xpr, std::forward<F>(f));
|
||||
return accxpr( mpi::getRankNumber(), mK->id(), mI->ifor(xpr, std::forward<F>(f)),
|
||||
NoF {});
|
||||
}
|
||||
|
||||
template <class IndexI, class IndexK>
|
||||
|
|
|
@ -138,11 +138,10 @@ namespace
|
|||
(std::get<2>(vec)+1)%L, (std::get<3>(vec)+1)%L); } );
|
||||
Vector<bool> req(xp->range()->size(), false);
|
||||
for(const auto& r: *imap1){
|
||||
req[(r+mpi::getRankNumber()*16*12*12*12/4)%req.size()] = true;
|
||||
req[r] = true;
|
||||
}
|
||||
res.load(x, AB, req); // DUMMY, not used...
|
||||
mM1.load(xp, AB, req);
|
||||
//res.rop(x*A*B) = mapXpr(xp,x,imap1, mM1(xp*A*B) - mM1(x*A*B) );
|
||||
res(x*A*B) = mapXpr(xp,x,imap1, mM1(xp*A*B) - mM1(x*A*B) );
|
||||
|
||||
for(SizeT x0 = 0; x0 != T; ++x0) {
|
||||
|
|
|
@ -119,9 +119,9 @@ namespace
|
|||
setupBuffer(rgi, req, data, buf, map, mSRange->size());
|
||||
|
||||
EXPECT_EQ(mRRange->sub(1)->size(), 16*12*12*12/4);
|
||||
const SizeT locsz = rgj->local()->lmax().val();
|
||||
const SizeT myrankoff = myrank*locsz;
|
||||
const SizeT mapsize = map.size();
|
||||
//const SizeT locsz = rgj->local()->lmax().val();
|
||||
//const SizeT myrankoff = myrank*locsz;
|
||||
//const SizeT mapsize = map.size();
|
||||
// Fourth loop: Check:
|
||||
for(*rgi = 0, gi = 0; rgi->lex() != rgi->lmax().val(); ++*rgi, ++gi){
|
||||
gj = gi.lex();
|
||||
|
@ -132,7 +132,8 @@ namespace
|
|||
*rgj = gj.lex();
|
||||
|
||||
if(rgi->rank() == myrank){
|
||||
const SizeT mpidx = (rgj->pos() - myrankoff + mapsize) % mapsize;
|
||||
const SizeT mpidx = rgj->pos();
|
||||
//const SizeT mpidx = (rgj->pos() - myrankoff + mapsize) % mapsize;
|
||||
EXPECT_TRUE(map.data()[mpidx] != nullptr);
|
||||
|
||||
const Double vn = *map[mpidx]/blocks;
|
||||
|
|
Loading…
Reference in a new issue