diff --git a/src/include/xpr/acc_xpr.cc.h b/src/include/xpr/acc_xpr.cc.h new file mode 100644 index 0000000..901bde2 --- /dev/null +++ b/src/include/xpr/acc_xpr.cc.h @@ -0,0 +1,70 @@ + +#ifndef __cxz_acc_xpr_cc_h__ +#define __cxz_acc_xpr_cc_h__ + +#include "acc_xpr.h" + +namespace CNORXZ +{ + template + constexpr AccXpr::AccXpr(SizeT n, const IndexId& id, + const Xpr& xpr, F&& f) : + mN(n), + mId(id), + mXpr(xpr), + mExt(mXpr.rootSteps(mId)), + mF(std::forward(f)) + {} + + template + template + inline decltype(auto) AccXpr::operator()(const PosT& last) const + { + if constexpr(std::is_same::type,NoF>::value){ + const auto pos = last + mExt( UPos(mN) ); + mXpr(pos); + return None {}; + } + else { + typedef typename + std::remove_reference::type OutT; + auto o = OutT(); + const auto pos = last + mExt( UPos(mN) ); + mF(o, mXpr(pos)); + return o; + } + } + + template + inline decltype(auto) AccXpr::operator()() const + { + if constexpr(std::is_same::type,NoF>::value){ + const auto pos = mExt( UPos(mN) ); + mXpr(pos); + return None {}; + } + else { + typedef typename + std::remove_reference::type OutT; + auto o = OutT(); + const auto pos = mExt( UPos(mN) ); + mF(o, mXpr(pos)); + return o; + } + } + + template + template + inline decltype(auto) AccXpr::rootSteps(const IndexId& id) const + { + return mXpr.rootSteps(id); + } + + template + constexpr decltype(auto) accxpr(SizeT n, const IndexId& id, const Xpr& xpr, F&& f) + { + return AccXpr(n, id, xpr, std::forward(f)); + } +} + +#endif diff --git a/src/include/xpr/acc_xpr.h b/src/include/xpr/acc_xpr.h new file mode 100644 index 0000000..be41f67 --- /dev/null +++ b/src/include/xpr/acc_xpr.h @@ -0,0 +1,41 @@ + +// rank access expression, fix rank position to current rank + +#ifndef __cxz_acc_xpr_h__ +#define __cxz_acc_xpr_h__ + +//#include "base/base.h" +#include "xpr_base.h" + +namespace CNORXZ +{ + template + class AccXpr : public XprInterface> + { + public: + DEFAULT_MEMBERS(AccXpr); + + constexpr AccXpr(SizeT n, const IndexId& id, const Xpr& xpr, F&& f); + + template + inline decltype(auto) operator()(const PosT& last) const; + + inline decltype(auto) operator()() const; + + template + inline decltype(auto) rootSteps(const IndexId& id) const; + + private: + SizeT mN = 0; + IndexId mId; + Xpr mXpr; + typedef decltype(mXpr.rootSteps(mId)) XPosT; + XPosT mExt; + F mF; + }; + + template + constexpr decltype(auto) accxpr(SizeT n, const IndexId& id, const Xpr& xpr, F&& f); +} + +#endif diff --git a/src/include/xpr/racc_xpr.cc.h b/src/include/xpr/racc_xpr.cc.h deleted file mode 100644 index 0326baf..0000000 --- a/src/include/xpr/racc_xpr.cc.h +++ /dev/null @@ -1,73 +0,0 @@ - -#ifndef __cxz_racc_xpr_cc_h__ -#define __cxz_racc_xpr_cc_h__ - -#include "racc_xpr.h" - -namespace CNOXRZ -{ - namespace mpi - { - template - constexpr AccXpr::AccXpr(SizeT n, const IndexId& id, - const Xpr& xpr, F&& f) : - mN(n), - mId(id), - mXpr(xpr), - mExt(mXpr.rootSteps(mId)), - mF(std::forward(f)) - {} - - template - template - inline decltype(auto) AccXpr::operator()(const PosT& last) const - { - if constexpr(std::is_same::type,NoF>::value){ - const auto pos = last + mExt( UPos(mN) ); - mXpr(pos); - return None {}; - } - else { - typedef typename - std::remove_reference::type OutT; - auto o = OutT(); - const auto pos = last + mExt( UPos(mN) ); - mF(o, mXpr(pos)); - return o; - } - } - - template - inline decltype(auto) AccXpr::operator()() const - { - if constexpr(std::is_same::type,NoF>::value){ - const auto pos = mExt( UPos(mN) ); - mXpr(pos); - return None {}; - } - else { - typedef typename - std::remove_reference::type OutT; - auto o = OutT(); - const auto pos = mExt( UPos(mN) ); - mF(o, mXpr(pos)); - return o; - } - } - - template - template - inline decltype(auto) AccXpr::rootSteps(const IndexId& id) const - { - return mXpr.rootSteps(id); - } - - template - constexpr decltype(auto) accxpr(SizeT n, const IndexId& id, const Xpr& xpr, F&& f) - { - return AccXpr(size, id, xpr, std::forward(f)); - } - } -} - -#endif diff --git a/src/include/xpr/racc_xpr.h b/src/include/xpr/racc_xpr.h deleted file mode 100644 index 66ce09a..0000000 --- a/src/include/xpr/racc_xpr.h +++ /dev/null @@ -1,44 +0,0 @@ - -// rank access expression, fix rank position to current rank - -#ifndef __cxz_racc_xpr_h__ -#define __cxz_racc_xpr_h__ - -#include "mpi_base.h" - -namespace CNORXZ -{ - namespace mpi - { - - template - class AccXpr : public XprInterface> - { - public: - DEFAULT_MEMBERS(AccXpr); - - constexpr AccXpr(SizeT n, const IndexId& id, const Xpr& xpr, F&& f); - - template - inline decltype(auto) operator()(const PosT& last) const; - - inline decltype(auto) operator()() const; - - template - inline decltype(auto) rootSteps(const IndexId& id) const; - - private: - SizeT mN = 0; - IndexId mId; - Xpr mXpr; - typedef decltype(mXpr.rootSteps(mId)) XPosT; - XPosT mExt; - F mF; - }; - - template - constexpr decltype(auto) accxpr(SizeT n, const IndexId& id, const Xpr& xpr, F&& f); - } -} - -#endif diff --git a/src/include/xpr/xpr.cc.h b/src/include/xpr/xpr.cc.h index dab4606..1f57824 100644 --- a/src/include/xpr/xpr.cc.h +++ b/src/include/xpr/xpr.cc.h @@ -16,3 +16,4 @@ #include "index_id.cc.h" #include "func.cc.h" #include "map_xpr.cc.h" +#include "acc_xpr.cc.h" diff --git a/src/include/xpr/xpr.h b/src/include/xpr/xpr.h index e6fe804..b40dbeb 100644 --- a/src/include/xpr/xpr.h +++ b/src/include/xpr/xpr.h @@ -16,5 +16,6 @@ #include "index_id.h" #include "func.h" #include "map_xpr.h" +#include "acc_xpr.h" #include "xpr.cc.h" diff --git a/src/opt/mpi/include/rarray.cc.h b/src/opt/mpi/include/rarray.cc.h index dec610c..d64b306 100644 --- a/src/opt/mpi/include/rarray.cc.h +++ b/src/opt/mpi/include/rarray.cc.h @@ -169,6 +169,7 @@ namespace CNORXZ inline decltype(auto) RCArray::operator()(const DPack& pack) const { // TODO: assert that none of the indices is rank index + CXZ_ERROR("not implemented"); return (*mA)(pack); } @@ -361,6 +362,7 @@ namespace CNORXZ inline decltype(auto) RArray::operator()(const DPack& pack) const { // TODO: assert that none of the indices is rank index + CXZ_ERROR("not implemented"); return (*mB)(pack); } @@ -507,14 +509,13 @@ namespace CNORXZ } // Third loop: Assign map to target buffer positions: - const SizeT myrankoff = myrank*locsz; assert(mapsize == Nranks*locsz); Vector cnt(Nranks); mi->ifor( operation ( [&](SizeT p) { const SizeT r = p / locsz; const SizeT l = p % locsz; - const SizeT mpidx = (p - myrankoff + mapsize) % mapsize; + const SizeT mpidx = p; if(myrank != r and required[p]){ SizeT off = 0; for(SizeT s = 0; s != r; ++s){ @@ -524,7 +525,7 @@ namespace CNORXZ ++cnt[r]; } if(myrank == r){ - assert(mpidx < locsz); + assert(mpidx < (myrank+1)*locsz); map[mpidx] = data.data() + l*blocks; } } , posop(mi) ), NoF {} )(); diff --git a/src/opt/mpi/include/rmap_xpr.cc.h b/src/opt/mpi/include/rmap_xpr.cc.h index 785db32..98e4715 100644 --- a/src/opt/mpi/include/rmap_xpr.cc.h +++ b/src/opt/mpi/include/rmap_xpr.cc.h @@ -23,23 +23,26 @@ namespace CNORXZ const Sptr& si, const F& f, const Sptr>& m) { + // This was the old shift, keep it here as comment if we want to introduce other shifts + // in order to reduce memory consumption by the maps; + // remember to invert the shift in the map xpr BEFORE calling the map! + //const SizeT locsz = tix.local()->pmax().val(); + //const SizeT tarsize = locsz*mpi::getNumRanks(); + //const SizeT idx = (tix.pos() - locsz*myrank + tarsize) % tarsize; + auto six = *si; auto sie = si->range()->end(); auto tix = *ti; - const SizeT locsz = tix.local()->pmax().val(); - const SizeT tarsize = locsz*mpi::getNumRanks(); const SizeT mapsize = m->size(); const SizeT myrank = mpi::getRankNumber(); if constexpr(mpi::is_rank_index::value){ - CXZ_ASSERT(mapsize == six.local()->pmax().val(), "map not well-formatted: size = " + CXZ_ASSERT(mapsize == six.pmax().val(), "map not well-formatted: size = " << mapsize << ", expected " << six.local()->pmax().val()); for(six = 0; six != sie; ++six){ tix.at( f(*six) ); if(six.rank() == myrank){ - //const SizeT idx = (tix.pos() - locsz*tix.rank() + tarsize) % tarsize; - const SizeT idx = (tix.pos() - locsz*myrank + tarsize) % tarsize; - //const SizeT idx = tix.pos(); - (*m)[six.local()->pos()] = idx; + const SizeT idx = tix.pos(); + (*m)[six.pos()] = idx; } } } @@ -48,9 +51,7 @@ namespace CNORXZ << mapsize << ", expected " << six.pmax().val()); for(six = 0; six != sie; ++six){ tix.at( f(*six) ); - //const SizeT idx = (tix.pos() - locsz*tix.rank() + tarsize) % tarsize; - const SizeT idx = (tix.pos() - locsz*myrank + tarsize) % tarsize; - //const SizeT idx = tix.pos() + const SizeT idx = tix.pos() (*m)[six.pos()] = idx; } } @@ -62,13 +63,7 @@ namespace CNORXZ const Sptr& si, const F& f) { - SizeT mapsize = 0; - if constexpr(mpi::is_rank_index::value){ - mapsize = si->local()->lmax().val(); - } - else { - mapsize = si->lmax().val(); - } + const SizeT mapsize = si->pmax().val(); auto o = std::make_shared>(mapsize); setup(ti,si,f,o); return o; diff --git a/src/opt/mpi/include/rop_types.cc.h b/src/opt/mpi/include/rop_types.cc.h index a1a3613..56010cb 100644 --- a/src/opt/mpi/include/rop_types.cc.h +++ b/src/opt/mpi/include/rop_types.cc.h @@ -29,12 +29,16 @@ namespace CNORXZ template constexpr decltype(auto) CROpRoot::operator()(const PosT& pos) const { + //CXZ_ASSERT(pos.val() < mRIndex->pmax().val(), pos.val() << ">=" << mRIndex->pmax().val()); + //CXZ_ASSERT(mData[pos.val()] != nullptr, "data[" << pos.val() << "] == null"); + //CXZ_ASSERT(pos.next().val() < mIndex->pmax().val(), pos.val() << ">=" << mIndex->pmax().val()); return (mData[pos.val()])[pos.next().val()]; } template constexpr decltype(auto) CROpRoot::operator()() const { + //CXZ_ASSERT(mData[0] != nullptr, "data[" << 0 << "] == null"); return (mData[0])[0]; } @@ -61,11 +65,9 @@ namespace CNORXZ const Sptr& li) : mLocal(&a.local()), mData(a.buffermap().data()), - //mData(a.data()), mRIndex(ri), mIndex(li) { - //CXZ_ERROR("nope"); CXZ_ASSERT(a.buffermap().size() == ri->lmax().val(), "data map not properly initialized: map size = " << a.buffermap().size() << ", rank index range size = " << ri->lmax().val()); @@ -75,8 +77,8 @@ namespace CNORXZ template constexpr ROpRoot& ROpRoot::operator=(const Op& in) { - (*mLocal)(mindexPtr(mRIndex->local()*mIndex)) = in; - //OI::a(mIndex, [](auto& a, const auto& b) { a = b; }, in); + (*mLocal)(mindexPtr(mRIndex->local()*mIndex)).a + (mindexPtr(mRIndex*mIndex),[](auto& a, const auto& b) { a = b; }, in); return *this; } @@ -84,16 +86,16 @@ namespace CNORXZ template constexpr ROpRoot& ROpRoot::operator+=(const Op& in) { - (*mLocal)(mindexPtr(mRIndex->local()*mIndex)) += in; - //OI::a(mIndex, [](auto& a, const auto& b) { a += b; }, in); + (*mLocal)(mindexPtr(mRIndex->local()*mIndex)).a + (mindexPtr(mRIndex*mIndex),[](auto& a, const auto& b) { a += b; }, in); return *this; } template constexpr ROpRoot& ROpRoot::operator=(const ROpRoot& in) { - (*mLocal)(mindexPtr(mRIndex->local()*mIndex)) = in; - //OI::a(mIndex, [](auto& a, const auto& b) { a = b; }, in); + (*mLocal)(mindexPtr(mRIndex->local()*mIndex)).a + (mindexPtr(mRIndex*mIndex),[](auto& a, const auto& b) { a = b; }, in); return *this; } @@ -101,12 +103,16 @@ namespace CNORXZ template constexpr decltype(auto) ROpRoot::operator()(const PosT& pos) const { + //CXZ_ASSERT(pos.val() < mRIndex->pmax().val(), pos.val() << ">=" << mRIndex->pmax().val()); + //CXZ_ASSERT(mData[pos.val()] != nullptr, "data[" << pos.val() << "] == null"); + //CXZ_ASSERT(pos.next().val() < mIndex->pmax().val(), pos.val() << ">=" << mIndex->pmax().val()); return (mData[pos.val()])[pos.next().val()]; } template constexpr decltype(auto) ROpRoot::operator()() const { + //CXZ_ASSERT(mData[0] != nullptr, "data[" << 0 << "] == null"); return (mData[0])[0]; } diff --git a/src/opt/mpi/include/rop_types.h b/src/opt/mpi/include/rop_types.h index 5415182..2c44259 100644 --- a/src/opt/mpi/include/rop_types.h +++ b/src/opt/mpi/include/rop_types.h @@ -116,6 +116,12 @@ namespace CNORXZ static constexpr SizeT value = 2; }; + template + struct op_size> + { + static constexpr SizeT value = 2; + }; + } // namespace CNORXZ #endif diff --git a/src/opt/mpi/include/rrange.cc.h b/src/opt/mpi/include/rrange.cc.h index d67af1f..de9c168 100644 --- a/src/opt/mpi/include/rrange.cc.h +++ b/src/opt/mpi/include/rrange.cc.h @@ -231,8 +231,7 @@ namespace CNORXZ if constexpr(I != 0){ return SPos<0> {}; } else { return UPos(id == this->id() ? 1 : 0); } }; - return mI->stepSize(id) + own(); - //return getRankStepSize(id); + return mK->stepSize(id) * mI->pmax() * UPos(mRankFormat) + mI->stepSize(id) + own(); } template @@ -330,7 +329,8 @@ namespace CNORXZ template constexpr decltype(auto) RIndex::ifor(const Xpr& xpr, F&& f) const { - return mI->ifor(xpr, std::forward(f)); + return accxpr( mpi::getRankNumber(), mK->id(), mI->ifor(xpr, std::forward(f)), + NoF {}); } template diff --git a/src/opt/mpi/tests/roperation_unit_test.cc b/src/opt/mpi/tests/roperation_unit_test.cc index 917758d..8c57ea0 100644 --- a/src/opt/mpi/tests/roperation_unit_test.cc +++ b/src/opt/mpi/tests/roperation_unit_test.cc @@ -138,11 +138,10 @@ namespace (std::get<2>(vec)+1)%L, (std::get<3>(vec)+1)%L); } ); Vector req(xp->range()->size(), false); for(const auto& r: *imap1){ - req[(r+mpi::getRankNumber()*16*12*12*12/4)%req.size()] = true; + req[r] = true; } res.load(x, AB, req); // DUMMY, not used... mM1.load(xp, AB, req); - //res.rop(x*A*B) = mapXpr(xp,x,imap1, mM1(xp*A*B) - mM1(x*A*B) ); res(x*A*B) = mapXpr(xp,x,imap1, mM1(xp*A*B) - mM1(x*A*B) ); for(SizeT x0 = 0; x0 != T; ++x0) { diff --git a/src/opt/mpi/tests/setbuf_unit_test.cc b/src/opt/mpi/tests/setbuf_unit_test.cc index 94be2a3..b977eb1 100644 --- a/src/opt/mpi/tests/setbuf_unit_test.cc +++ b/src/opt/mpi/tests/setbuf_unit_test.cc @@ -119,9 +119,9 @@ namespace setupBuffer(rgi, req, data, buf, map, mSRange->size()); EXPECT_EQ(mRRange->sub(1)->size(), 16*12*12*12/4); - const SizeT locsz = rgj->local()->lmax().val(); - const SizeT myrankoff = myrank*locsz; - const SizeT mapsize = map.size(); + //const SizeT locsz = rgj->local()->lmax().val(); + //const SizeT myrankoff = myrank*locsz; + //const SizeT mapsize = map.size(); // Fourth loop: Check: for(*rgi = 0, gi = 0; rgi->lex() != rgi->lmax().val(); ++*rgi, ++gi){ gj = gi.lex(); @@ -132,7 +132,8 @@ namespace *rgj = gj.lex(); if(rgi->rank() == myrank){ - const SizeT mpidx = (rgj->pos() - myrankoff + mapsize) % mapsize; + const SizeT mpidx = rgj->pos(); + //const SizeT mpidx = (rgj->pos() - myrankoff + mapsize) % mapsize; EXPECT_TRUE(map.data()[mpidx] != nullptr); const Double vn = *map[mpidx]/blocks;