further auto vectorization (open issue: static loop reduction)

This commit is contained in:
Christian Zimmermann 2021-01-21 00:35:13 +01:00
parent 64d10867dd
commit 15664781f7
3 changed files with 100 additions and 41 deletions

View file

@ -346,6 +346,7 @@ namespace MultiArrayTools
template <typename V, class ET> template <typename V, class ET>
inline const V& ConstOperationRoot<T,Ranges...>::vget(ET pos) const inline const V& ConstOperationRoot<T,Ranges...>::vget(ET pos) const
{ {
VCHECK(pos.val());
return *(reinterpret_cast<const V*>(mDataPtr)+pos.val()); return *(reinterpret_cast<const V*>(mDataPtr)+pos.val());
} }
@ -595,29 +596,30 @@ namespace MultiArrayTools
template <bool VABLE = false> template <bool VABLE = false>
struct VExec struct VExec
{ {
template <typename TarOp, class OpClass> template <template <typename> class F, typename TarOp, class OpClass>
static inline void exec(TarOp& th, const OpClass& in) static inline void exec(TarOp& th, const OpClass& in)
{ {
th.assign(in)(); typedef typename TarOp::value_type T;
th.template asx<IAccess<F<T>>>(in)();
} }
}; };
template <> template <>
struct VExec<true> struct VExec<true>
{ {
template <typename TarOp, class OpClass> template <template <typename> class F, typename TarOp, class OpClass>
static inline void exec(TarOp& th, const OpClass& in) static inline void exec(TarOp& th, const OpClass& in)
{ {
CHECK; CHECK;
typedef typename TarOp::value_type T; typedef typename TarOp::value_type T;
auto x = th.template asx<IVAssign<typename VType<T>::type,T>>(in); auto x = th.template asx<IVAccess<typename VType<T>::type,F<T>>>(in);
const size_t inum = x.vec(VType<T>::MULT); const size_t inum = x.vec(VType<T>::MULT);
if(x.rootSteps(inum) == 1){ if(x.rootSteps(inum) == 1){
CHECK; CHECK;
x(); x();
} }
else { else {
th.assign(in)(); th.template asx<IAccess<F<T>>>(in)();
} }
} }
}; };
@ -626,7 +628,7 @@ namespace MultiArrayTools
template <class OpClass> template <class OpClass>
OperationRoot<T,Ranges...>& OperationRoot<T,Ranges...>::operator=(const OpClass& in) OperationRoot<T,Ranges...>& OperationRoot<T,Ranges...>::operator=(const OpClass& in)
{ {
VExec<OpClass::VABLE>::exec(*this,in); VExec<OpClass::VABLE>::template exec<identity>(*this,in);
return *this; return *this;
} }
@ -634,7 +636,8 @@ namespace MultiArrayTools
template <class OpClass> template <class OpClass>
OperationRoot<T,Ranges...>& OperationRoot<T,Ranges...>::operator+=(const OpClass& in) OperationRoot<T,Ranges...>& OperationRoot<T,Ranges...>::operator+=(const OpClass& in)
{ {
plus(in)(); VExec<OpClass::VABLE>::template exec<xxxplus>(*this,in);
//plus(in)();
return *this; return *this;
} }
@ -661,6 +664,7 @@ namespace MultiArrayTools
template <typename V, class ET> template <typename V, class ET>
inline V& OperationRoot<T,Ranges...>::vget(ET pos) const inline V& OperationRoot<T,Ranges...>::vget(ET pos) const
{ {
VCHECK(pos.val());
return *(reinterpret_cast<V*>(mDataPtr)+pos.val()); return *(reinterpret_cast<V*>(mDataPtr)+pos.val());
} }
@ -732,46 +736,77 @@ namespace MultiArrayTools
} }
template <typename T, class... Ranges> template <typename T, class... Ranges>
template <class OpClass> template <class IOp, class OpClass>
auto ParallelOperationRoot<T,Ranges...>::assign(const OpClass& in) auto ParallelOperationRoot<T,Ranges...>::asx(const OpClass& in) const
-> decltype(mIndex.pifor(1,in.loop(AssignmentExpr2<T,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET> -> decltype(mIndex.pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
(mOrigDataPtr,*this,in)))) (mOrigDataPtr,*this,in))))
{ {
return mIndex.pifor(1,in.loop(AssignmentExpr2<T,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET> static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" );
return mIndex.pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
(mOrigDataPtr,*this,in))); (mOrigDataPtr,*this,in)));
} }
template <typename T, class... Ranges>
template <class IOp, class OpClass>
auto ParallelOperationRoot<T,Ranges...>::asxExpr(const OpClass& in) const
-> decltype(in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
(mOrigDataPtr,*this,in)))
{
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" );
return in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
(mOrigDataPtr,*this,in));
}
template <typename T, class... Ranges>
template <class IOp, class OpClass, class Index>
auto ParallelOperationRoot<T,Ranges...>::asx(const OpClass& in, const std::shared_ptr<Index>& i) const
-> decltype(i->pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
(mOrigDataPtr,*this,in))))
{
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" );
return i->pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
(mOrigDataPtr,*this,in)));
}
template <typename T, class... Ranges>
template <class OpClass>
auto ParallelOperationRoot<T,Ranges...>::assign(const OpClass& in) const
-> decltype(this->template asx<IAssign<T>>(in))
{
return this->template asx<IAssign<T>>(in);
}
template <typename T, class... Ranges>
template <class OpClass>
auto ParallelOperationRoot<T,Ranges...>::assignExpr(const OpClass& in) const
-> decltype(this->template asxExpr<IAssign<T>>(in))
{
return this->template asxExpr<IAssign<T>>(in);
}
template <typename T, class... Ranges> template <typename T, class... Ranges>
template <class OpClass, class Index> template <class OpClass, class Index>
auto ParallelOperationRoot<T,Ranges...>::assign(const OpClass& in, const std::shared_ptr<Index>& i) const auto ParallelOperationRoot<T,Ranges...>::assign(const OpClass& in, const std::shared_ptr<Index>& i) const
-> decltype(i->pifor(1,in.loop(AssignmentExpr2<T,ParallelOperationRoot<T,Ranges...>,OpClass> -> decltype(this->template asx<IAssign<T>>(in,i))
(mOrigDataPtr,*this,in))))
{ {
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" ); return this->template asx<IAssign<T>>(in,i);
return i->pifor(1,in.loop(AssignmentExpr2<T,ParallelOperationRoot<T,Ranges...>,OpClass>
(mOrigDataPtr,*this,in)));
} }
template <typename T, class... Ranges> template <typename T, class... Ranges>
template <class OpClass> template <class OpClass>
auto ParallelOperationRoot<T,Ranges...>::plus(const OpClass& in) auto ParallelOperationRoot<T,Ranges...>::plus(const OpClass& in) const
-> decltype(mIndex.pifor(1,in.loop(AddExpr<T,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET> -> decltype(this->template asx<IPlus<T>>(in))
(mOrigDataPtr,*this,in))))
{ {
return mIndex.pifor(1,in.loop(AddExpr<T,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET> return this->template asx<IPlus<T>>(in);
(mOrigDataPtr,*this,in)));
} }
template <typename T, class... Ranges> template <typename T, class... Ranges>
template <class OpClass, class Index> template <class OpClass, class Index>
auto ParallelOperationRoot<T,Ranges...>::plus(const OpClass& in, const std::shared_ptr<Index>& i) const auto ParallelOperationRoot<T,Ranges...>::plus(const OpClass& in, const std::shared_ptr<Index>& i) const
-> decltype(i->pifor(1,in.loop(AddExpr<T,ParallelOperationRoot<T,Ranges...>,OpClass> -> decltype(this->template asx<IPlus<T>>(in,i))
(mOrigDataPtr,*this,in))))
{ {
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" ); return this->template asx<IPlus<T>>(in,i);
return i->pifor(1,in.loop(AddExpr<T,ParallelOperationRoot<T,Ranges...>,OpClass>
(mOrigDataPtr,*this,in)));
} }
template <typename T, class... Ranges> template <typename T, class... Ranges>

View file

@ -251,10 +251,14 @@ namespace MultiArrayTools
template <typename T, typename Op, class ExtType> template <typename T, typename Op, class ExtType>
static inline void f(T*& t, size_t pos, const Op& op, ExtType e) static inline void f(T*& t, size_t pos, const Op& op, ExtType e)
{ {
VCHECK(pos);
VFunc<F>::selfApply(reinterpret_cast<V*>(t)[pos],op.template vget<V>(e)); VFunc<F>::selfApply(reinterpret_cast<V*>(t)[pos],op.template vget<V>(e));
} }
}; };
template <typename T>
using xxxplus = plus<T>;
template <typename T> template <typename T>
using IAssign = IAccess<identity<T>>; using IAssign = IAccess<identity<T>>;
@ -684,25 +688,39 @@ namespace MultiArrayTools
ParallelOperationRoot(T* data, const IndexType& ind); ParallelOperationRoot(T* data, const IndexType& ind);
template <class OpClass> template <class IOp, class OpClass>
auto assign(const OpClass& in) auto asx(const OpClass& in) const
-> decltype(mIndex.pifor(1,in.loop(AssignmentExpr2<T,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET> -> decltype(mIndex.pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
(mOrigDataPtr,*this,in)))); (mOrigDataPtr,*this,in))));
template <class IOp, class OpClass>
auto asxExpr(const OpClass& in) const
-> decltype(in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>(mOrigDataPtr,*this,in)));
template <class IOp, class OpClass, class Index>
auto asx(const OpClass& in, const std::shared_ptr<Index>& i) const
-> decltype(i->pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
(mOrigDataPtr,*this,in))));
template <class OpClass>
auto assign(const OpClass& in) const
-> decltype(this->template asx<IAssign<T>>(in));
template <class OpClass>
auto assignExpr(const OpClass& in) const
-> decltype(this->template asxExpr<IAssign<T>>(in));
template <class OpClass, class Index> template <class OpClass, class Index>
auto assign(const OpClass& in, const std::shared_ptr<Index>& i) const auto assign(const OpClass& in, const std::shared_ptr<Index>& i) const
-> decltype(i->pifor(1,in.loop(AssignmentExpr2<T,ParallelOperationRoot<T,Ranges...>,OpClass> -> decltype(this->template asx<IAssign<T>>(in,i));
(mOrigDataPtr,*this,in))));
template <class OpClass> template <class OpClass>
auto plus(const OpClass& in) auto plus(const OpClass& in) const
-> decltype(mIndex.pifor(1,in.loop(AddExpr<T,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET> -> decltype(this->template asx<IPlus<T>>(in));
(mOrigDataPtr,*this,in))));
template <class OpClass, class Index> template <class OpClass, class Index>
auto plus(const OpClass& in, const std::shared_ptr<Index>& i) const auto plus(const OpClass& in, const std::shared_ptr<Index>& i) const
-> decltype(i->pifor(1,in.loop(AddExpr<T,ParallelOperationRoot<T,Ranges...>,OpClass> -> decltype(this->template asx<IPlus<T>>(in,i));
(mOrigDataPtr,*this,in))));
template <class OpClass> template <class OpClass>
ParallelOperationRoot& operator=(const OpClass& in); ParallelOperationRoot& operator=(const OpClass& in);

View file

@ -396,7 +396,9 @@ namespace MultiArrayHelper
virtual std::intptr_t vec(size_t vs) override final virtual std::intptr_t vec(size_t vs) override final
{ {
if(mStep == 1 and mMax % vs == 0){ if(mStep == 1 and mMax % vs == 0){
VCHECK(vs);
mMax /= vs; mMax /= vs;
VCHECK(mMax);
return reinterpret_cast<std::intptr_t>(mIndPtr); return reinterpret_cast<std::intptr_t>(mIndPtr);
} }
return mExpr.vec(vs); return mExpr.vec(vs);
@ -450,10 +452,14 @@ namespace MultiArrayHelper
PFor(const IndexClass* indPtr, PFor(const IndexClass* indPtr,
size_t step, Expr expr); size_t step, Expr expr);
virtual std::intptr_t vec(size_t vs) override final template <size_t VS>
auto vec() const
{ {
// statically distinguish!!!
if(mStep == 1 and mMax % vs == 0){ if(mStep == 1 and mMax % vs == 0){
VCHECK(vs);
mMax /= vs; mMax /= vs;
VCHECK(mMax);
return reinterpret_cast<std::intptr_t>(mIndPtr); return reinterpret_cast<std::intptr_t>(mIndPtr);
} }
return mExpr.vec(vs); return mExpr.vec(vs);