From ad4c0f177e738d0839a1754dd89871fd889336a5 Mon Sep 17 00:00:00 2001 From: Christian Zimmermann Date: Mon, 6 May 2024 02:36:56 +0200 Subject: [PATCH] im com --- src/opt/mpi/include/rarray.cc.h | 54 +++++++++++++++--- src/opt/mpi/include/rarray.h | 17 +++++- src/opt/mpi/include/rmap_xpr.cc.h | 48 ++++++++++++---- src/opt/mpi/include/rmap_xpr.h | 11 ++-- src/opt/mpi/include/rop_types.cc.h | 35 +++++++++--- src/opt/mpi/include/rop_types.h | 18 +++++- src/opt/mpi/tests/roperation_unit_test.cc | 68 ++++++++++++++++++++--- src/opt/mpi/tests/setbuf_unit_test.cc | 15 +++-- 8 files changed, 220 insertions(+), 46 deletions(-) diff --git a/src/opt/mpi/include/rarray.cc.h b/src/opt/mpi/include/rarray.cc.h index 4366b8c..a3fadb0 100644 --- a/src/opt/mpi/include/rarray.cc.h +++ b/src/opt/mpi/include/rarray.cc.h @@ -331,20 +331,20 @@ namespace CNORXZ template template - OpRoot RArray::operator()(const Sptr& i) + COpRoot RArray::operator()(const Sptr& i) const { CXZ_ERROR("not implemented"); - return OpRoot(); + return COpRoot(); } template template - inline decltype(auto) RArray::operator()(const SPack& pack) + inline decltype(auto) RArray::operator()(const SPack& pack) const { typedef typename std::remove_reference{}])>::type I0; if constexpr(is_rank_index::value){ // preliminary: - CXZ_ASSERT(this->formatIsTrivial(), + CXZ_ASSERT(mB->formatIsTrivial(), "array has non-trivial format, rank operations require trivial format"); auto ri = pack[CSizeT<0>{}]; auto li = iter<1,sizeof...(Indices)> @@ -358,12 +358,47 @@ namespace CNORXZ } template - inline decltype(auto) RArray::operator()(const DPack& pack) + inline decltype(auto) RArray::operator()(const DPack& pack) const { // TODO: assert that none of the indices is rank index return (*mB)(pack); } + template + template + OpRoot RArray::rop(const Sptr& i) + { + return (*mB)(i); + } + + template + template + inline decltype(auto) RArray::rop(const SPack& pack) + { + typedef typename std::remove_reference{}])>::type I0; + if constexpr(is_rank_index::value){ + // preliminary: + CXZ_ASSERT(mB->formatIsTrivial(), + "array has non-trivial format, rank operations require trivial format"); + /* + auto ri = pack[CSizeT<0>{}]; + auto li = iter<1,sizeof...(Indices)> + ( [&](auto i) { return pack[CSizeT{}]; }, + [](const auto&... x) { return mindexPtr( (x * ...) ); } ); + */ + return oproot(*mB, mindexPtr(pack)); + } + else { + return (*mB)(pack); + } + } + + template + inline decltype(auto) RArray::rop(const DPack& pack) + { + return (*mB)(pack); + } + template T* RArray::data() { @@ -388,7 +423,6 @@ namespace CNORXZ return *mB; } - /*============================+ | non-member functions | +============================*/ @@ -485,21 +519,25 @@ namespace CNORXZ } // Third loop: Assign map to target buffer positions: + const SizeT myrankoff = myrank*locsz; + assert(mapsize == Nranks*locsz); Vector cnt(Nranks); mi->ifor( operation ( [&](SizeT p) { const SizeT r = p / locsz; const SizeT l = p % locsz; + const SizeT mpidx = (p - myrankoff + mapsize) % mapsize; if(myrank != r and required[p]){ SizeT off = 0; for(SizeT s = 0; s != r; ++s){ off += ext[myrank][s]; } - map[p] = buf.data() + off*blocks + cnt[r]*blocks; + map[mpidx] = buf.data() + off*blocks + cnt[r]*blocks; ++cnt[r]; } if(myrank == r){ - map[p] = data.data() + l*blocks; + assert(mpidx < locsz); + map[mpidx] = data.data() + l*blocks; } } , posop(mi) ), NoF {} )(); } diff --git a/src/opt/mpi/include/rarray.h b/src/opt/mpi/include/rarray.h index 090bb83..34d57d0 100644 --- a/src/opt/mpi/include/rarray.h +++ b/src/opt/mpi/include/rarray.h @@ -218,14 +218,25 @@ namespace CNORXZ /** @copydoc ArrayBase::operator() */ template - OpRoot operator()(const Sptr& i); + COpRoot operator()(const Sptr& i) const; /** @copydoc ArrayBase::operator() */ template - inline decltype(auto) operator()(const SPack& pack); + inline decltype(auto) operator()(const SPack& pack) const; /** @copydoc ArrayBase::operator() */ - inline decltype(auto) operator()(const DPack& pack); + inline decltype(auto) operator()(const DPack& pack) const; + + /** @copydoc ArrayBase::operator() */ + template + OpRoot rop(const Sptr& i); + + /** @copydoc ArrayBase::operator() */ + template + inline decltype(auto) rop(const SPack& pack); + + /** @copydoc ArrayBase::operator() */ + inline decltype(auto) rop(const DPack& pack); /** @copydoc ArrayBase::data() */ T* data(); diff --git a/src/opt/mpi/include/rmap_xpr.cc.h b/src/opt/mpi/include/rmap_xpr.cc.h index cde7969..efead35 100644 --- a/src/opt/mpi/include/rmap_xpr.cc.h +++ b/src/opt/mpi/include/rmap_xpr.cc.h @@ -13,33 +13,59 @@ #define __cxz_mpi_rmap_xpr_cc_h__ #include "rmap_xpr.h" +#include "mpi_base.h" namespace CNORXZ { - template + template void - MapSetup,F>::setup(const Sptr& ti, - const Sptr>& si, + MapSetup,SrcIndex,F>::setup(const Sptr>& ti, + const Sptr& si, const F& f, const Sptr>& m) { auto six = *si; auto sie = si->range()->end(); auto tix = *ti; - for(six = 0; six != sie; ++six){ - tix.at( f(*six) ); - if(six.rank() == mpi::getRankNumber()){ - (*m)[six.local()->pos()] = tix.pos(); + const SizeT locsz = tix.local()->pmax().val(); + const SizeT tarsize = locsz*mpi::getNumRanks(); + const SizeT mapsize = m->size(); + const SizeT myrank = mpi::getRankNumber(); + if constexpr(mpi::is_rank_index::value){ + CXZ_ASSERT(mapsize == six.local()->pmax().val(), "map not well-formatted: size = " + << mapsize << ", expected " << six.local()->pmax().val()); + for(six = 0; six != sie; ++six){ + tix.at( f(*six) ); + if(six.rank() == myrank){ + const SizeT idx = (tix.pos() - locsz*tix.rank() + tarsize) % tarsize; + (*m)[six.local()->pos()] = idx; + } + } + } + else { + CXZ_ASSERT(mapsize == six.pmax().val(), "map not well-formatted: size = " + << mapsize << ", expected " << six.pmax().val()); + for(six = 0; six != sie; ++six){ + tix.at( f(*six) ); + const SizeT idx = (tix.pos() - locsz*tix.rank() + tarsize) % tarsize; + (*m)[six.pos()] = idx; } } } - template + template Sptr> - MapSetup,F>::setup(const Sptr& ti, - const Sptr>& si, + MapSetup,SrcIndex,F>::setup(const Sptr>& ti, + const Sptr& si, const F& f) { - auto o = std::make_shared>(si->local()->lmax().val()); + SizeT mapsize = 0; + if constexpr(mpi::is_rank_index::value){ + mapsize = si->local()->lmax().val(); + } + else { + mapsize = si->lmax().val(); + } + auto o = std::make_shared>(mapsize); setup(ti,si,f,o); return o; } diff --git a/src/opt/mpi/include/rmap_xpr.h b/src/opt/mpi/include/rmap_xpr.h index 49fd888..a3786e8 100644 --- a/src/opt/mpi/include/rmap_xpr.h +++ b/src/opt/mpi/include/rmap_xpr.h @@ -17,14 +17,15 @@ namespace CNORXZ { - template - struct MapSetup,F> + template + struct MapSetup,SrcIndex,F> { - static void setup(const Sptr& ti, const Sptr>& si, + static void setup(const Sptr>& ti, + const Sptr& si, const F& f, const Sptr>& m); - static Sptr> setup(const Sptr& ti, - const Sptr>& si, + static Sptr> setup(const Sptr>& ti, + const Sptr& si, const F& f); }; } diff --git a/src/opt/mpi/include/rop_types.cc.h b/src/opt/mpi/include/rop_types.cc.h index 915e5d0..31983c5 100644 --- a/src/opt/mpi/include/rop_types.cc.h +++ b/src/opt/mpi/include/rop_types.cc.h @@ -56,33 +56,54 @@ namespace CNORXZ template constexpr ROpRoot::ROpRoot(RArray& a, const Sptr& ri, const Sptr& li) : - mData(a.buffermap().data()), + mData(a.data()), mRIndex(ri), mIndex(li) { - CXZ_ASSERT(a.buffermap().size() == ri->lmax().val(), - "data map not properly initialized: map size = " << a.buffermap().size() - << ", rank index range size = " << ri->lmax().val()); + CXZ_ERROR("nope"); + } + + template + template + constexpr ROpRoot& ROpRoot::operator=(const Op& in) + { + OI::a(mIndex, [](auto& a, const auto& b) { a = b; }, in); + return *this; + } + + template + template + constexpr ROpRoot& ROpRoot::operator+=(const Op& in) + { + OI::a(mIndex, [](auto& a, const auto& b) { a += b; }, in); + return *this; + } + + template + constexpr ROpRoot& ROpRoot::operator=(const ROpRoot& in) + { + OI::a(mIndex, [](auto& a, const auto& b) { a = b; }, in); + return *this; } template template constexpr decltype(auto) ROpRoot::operator()(const PosT& pos) const { - return (mData[pos.val()])[pos.next().val()]; + return mData[pos.val()]; } template constexpr decltype(auto) ROpRoot::operator()() const { - return (mData[0])[0]; + return mData[0]; } template template constexpr decltype(auto) ROpRoot::rootSteps(const IndexId& id) const { - return mRIndex->stepSize(id) << mIndex->stepSize(id); + return mIndex->stepSize(id); } template diff --git a/src/opt/mpi/include/rop_types.h b/src/opt/mpi/include/rop_types.h index 896cae2..2b10bfa 100644 --- a/src/opt/mpi/include/rop_types.h +++ b/src/opt/mpi/include/rop_types.h @@ -43,6 +43,7 @@ namespace CNORXZ Sptr mIndex; }; + template constexpr decltype(auto) croproot(const RCArray& a, const Sptr& ri, const Sptr& li); @@ -57,6 +58,14 @@ namespace CNORXZ constexpr ROpRoot(RArray& a, const Sptr& ri, const Sptr& li); + template + constexpr ROpRoot& operator=(const Op& in); + + template + constexpr ROpRoot& operator+=(const Op& in); + + constexpr ROpRoot& operator=(const ROpRoot& in); + template constexpr decltype(auto) operator()(const PosT& pos) const; @@ -67,7 +76,7 @@ namespace CNORXZ private: - T** mData; + T* mData; Sptr mRIndex; Sptr mIndex; }; @@ -99,6 +108,13 @@ namespace CNORXZ }; */ } // namespace mpi + + template + struct op_size> + { + static constexpr SizeT value = 2; + }; + } // namespace CNORXZ #endif diff --git a/src/opt/mpi/tests/roperation_unit_test.cc b/src/opt/mpi/tests/roperation_unit_test.cc index 7b245bb..64482a9 100644 --- a/src/opt/mpi/tests/roperation_unit_test.cc +++ b/src/opt/mpi/tests/roperation_unit_test.cc @@ -51,15 +51,20 @@ namespace RangePtr scr = mSpRange*mSpRange; const Vector vec = Numbers::get(0,mRXRange->sub(1)->size()+2); RangePtr ltr = mRXRange->sub(1)->sub(0); - RangePtr llr = mRXRange->sub(1)->sub(1); - mMRange = ltr*llr*llr*llr*scr; + RangePtr ll1r = mRXRange->sub(1)->sub(1); + RangePtr ll2r = mRXRange->sub(1)->sub(2); + RangePtr ll3r = mRXRange->sub(1)->sub(3); + mMRange = ltr*ll1r*ll2r*ll3r*scr; Vector data(mMRange->size()); Vector data2(mMRange->size()); for(SizeT i = 0; i != mRXRange->sub(1)->size(); ++i){ for(SizeT j = 0; j != scr->size(); ++j){ const SizeT k = i*scr->size() + j; - data[k] = vec[i] + static_cast(j-scr->size()) / static_cast((myrank+1)*(T*L*L*L)); - data2[k] = vec[i] + static_cast((j*2-scr->size())*i) / static_cast((myrank+1)*50); + data[k] = vec[i] * static_cast(j+2); + data2[k] = vec[i] / static_cast(j+2); + if(k > 0){ + assert(data[k] != data[k-1]); + } } } mM1 = RCArray( MArray(mMRange, data), mGeom ); @@ -68,12 +73,15 @@ namespace mAll2 = Vector(data2.size() * getNumRanks()); typedef RIndex,MIndex> RI; const SizeT scrs = scr->size(); + auto rix = RI(mRXRange); + assert(rix.lmax().val() == 27648); + assert(scrs == 16); for(auto ri = RI(mRXRange); ri.lex() != ri.lmax().val(); ++ri){ Double* buf = mAll1.data() + scrs*ri.lex(); Double* buf2 = mAll2.data() + scrs*ri.lex(); if(ri.rank() == myrank){ - std::memcpy(buf, data.data()+ri.local()->lex()*scrs, scrs); - std::memcpy(buf2, data2.data()+ri.local()->lex()*scrs, scrs); + std::memcpy(buf, data.data()+ri.local()->lex()*scrs, scrs*sizeof(Double)); + std::memcpy(buf2, data2.data()+ri.local()->lex()*scrs, scrs*sizeof(Double)); } MPI_Bcast(buf, scrs, MPI_DOUBLE, ri.rank(), MPI_COMM_WORLD); MPI_Bcast(buf2, scrs, MPI_DOUBLE, ri.rank(), MPI_COMM_WORLD); @@ -102,6 +110,52 @@ namespace EXPECT_EQ(mM1.size(), mM2.size()); } + TEST_F(ROp_Test, Difference) + { + RArray res( MArray(mM1.range()->sub(1)), mGeom ); + Vector comp( mXRange->size()*mSpRange->size()*mSpRange->size() ); + EXPECT_EQ(res.size(), comp.size()); + + typedef UIndex UI; + + auto xp = std::make_shared,MIndex>>(mRXRange); + auto xm = std::make_shared,MIndex>>(mRXRange); + auto x = std::make_shared,MIndex>>(mRXRange); + auto A = std::make_shared>(mSpRange); + auto B = std::make_shared>(mSpRange); + auto AB = mindexPtr(A*B); + + Sptr> imap1; + imap1 = setupMap(xp, x, [&](const auto& vec) { + return std::make_tuple((std::get<0>(vec)+1)%T, (std::get<1>(vec)+1)%L, + (std::get<2>(vec)+1)%L, (std::get<3>(vec)+1)%L); } ); + mM1.load(x, xp, AB, imap1); + res.rop(x*A*B) = mapXpr(xp,x,imap1, mM1(xp*A*B) - mM1(x*A*B) ); + + for(SizeT x0 = 0; x0 != T; ++x0) { + for(SizeT x1 = 0; x1 != L; ++x1) + for(SizeT x2 = 0; x2 != L; ++x2) + for(SizeT x3 = 0; x3 != L; ++x3) + for(SizeT A = 0; A != 4; ++A) + for(SizeT B = 0; B != 4; ++B) { + const SizeT xi = x0*L*L*L + x1*L*L + x2*L + x3; + const SizeT x0p = (x0+1)%T; + const SizeT x1p = (x1+1)%L; + const SizeT x2p = (x2+1)%L; + const SizeT x3p = (x3+1)%L; + const SizeT xpi = x0p*L*L*L + x1p*L*L + x2p*L + x3p; + const SizeT pi = xpi*4*4 + A*4 + B; + const SizeT ri = xi*4*4 + A*4 + B; + comp[ri] = mAll1[pi] - mAll1[ri]; + }} + + for(auto i = res.begin(); i.lex() != i.lmax().val(); ++i){ + const auto a1 = *i; + const auto a2 = comp[i.lex()]; + EXPECT_EQ(a1, a2); + } + } + TEST_F(ROp_Test, Contract) { Vector comp(mRXRange->size()); @@ -141,7 +195,6 @@ namespace mM1.load(,A*B*a*b); mM2.load(,A*B*a*b); res(y) += (mM1(x*A*B*a*b) * mM2(xy*B*A*b*a)).c(x*A*B*a*b); - */ // comparison loop for(SizeT x0 = 0; x0 != T; ++x0) { VCHECK(x0); for(SizeT x1 = 0; x1 != L; ++x1) @@ -165,6 +218,7 @@ namespace comp[yi] += mAll1[i1] * mAll2[i2]; }} VCHECK(comp[123]); + */ /* for(auto i = res.begin(); i.lex() != i.lmax().val(); ++i){ EXPECT_EQ(*i, comp[i.lex()]); diff --git a/src/opt/mpi/tests/setbuf_unit_test.cc b/src/opt/mpi/tests/setbuf_unit_test.cc index b8fc327..f9dfcb7 100644 --- a/src/opt/mpi/tests/setbuf_unit_test.cc +++ b/src/opt/mpi/tests/setbuf_unit_test.cc @@ -114,6 +114,9 @@ namespace setupBuffer(rgj, rgi, fmap, data, buf, map, mSRange->size()); EXPECT_EQ(mRRange->sub(1)->size(), 16*12*12*12/4); + const SizeT locsz = rgj->local()->lmax().val(); + const SizeT myrankoff = myrank*locsz; + const SizeT mapsize = map.size(); // Fourth loop: Check: for(*rgi = 0, gi = 0; rgi->lex() != rgi->lmax().val(); ++*rgi, ++gi){ gj = gi.lex(); @@ -124,13 +127,17 @@ namespace *rgj = gj.lex(); if(rgi->rank() == myrank){ - EXPECT_TRUE(map.data()[rgj->pos()] != nullptr); - - const Double vn = *map[rgj->pos()]/blocks; + const SizeT mpidx = (rgj->pos() - myrankoff + mapsize) % mapsize; + VCHECK(mpidx); + assert(mpidx < map.size()); + EXPECT_TRUE(map.data()[mpidx] != nullptr); + if(map.data()[mpidx] == nullptr) continue; + + const Double vn = *map[mpidx]/blocks; const SizeT xp = static_cast(vn); const SizeT orank = xp / mRRange->sub(1)->size(); if(myrank == 0){ - std::cout << " pos = " << rgj->pos() << " , val = " << *map[rgj->pos()] + std::cout << " pos = " << rgj->pos() << " , val = " << *map[mpidx] << " , val_norm = " << vn << " , origin rank = " << orank << std::endl; }