further auto vectorization (open issue: static loop reduction)
This commit is contained in:
parent
64d10867dd
commit
15664781f7
3 changed files with 100 additions and 41 deletions
|
@ -346,6 +346,7 @@ namespace MultiArrayTools
|
||||||
template <typename V, class ET>
|
template <typename V, class ET>
|
||||||
inline const V& ConstOperationRoot<T,Ranges...>::vget(ET pos) const
|
inline const V& ConstOperationRoot<T,Ranges...>::vget(ET pos) const
|
||||||
{
|
{
|
||||||
|
VCHECK(pos.val());
|
||||||
return *(reinterpret_cast<const V*>(mDataPtr)+pos.val());
|
return *(reinterpret_cast<const V*>(mDataPtr)+pos.val());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -595,29 +596,30 @@ namespace MultiArrayTools
|
||||||
template <bool VABLE = false>
|
template <bool VABLE = false>
|
||||||
struct VExec
|
struct VExec
|
||||||
{
|
{
|
||||||
template <typename TarOp, class OpClass>
|
template <template <typename> class F, typename TarOp, class OpClass>
|
||||||
static inline void exec(TarOp& th, const OpClass& in)
|
static inline void exec(TarOp& th, const OpClass& in)
|
||||||
{
|
{
|
||||||
th.assign(in)();
|
typedef typename TarOp::value_type T;
|
||||||
|
th.template asx<IAccess<F<T>>>(in)();
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct VExec<true>
|
struct VExec<true>
|
||||||
{
|
{
|
||||||
template <typename TarOp, class OpClass>
|
template <template <typename> class F, typename TarOp, class OpClass>
|
||||||
static inline void exec(TarOp& th, const OpClass& in)
|
static inline void exec(TarOp& th, const OpClass& in)
|
||||||
{
|
{
|
||||||
CHECK;
|
CHECK;
|
||||||
typedef typename TarOp::value_type T;
|
typedef typename TarOp::value_type T;
|
||||||
auto x = th.template asx<IVAssign<typename VType<T>::type,T>>(in);
|
auto x = th.template asx<IVAccess<typename VType<T>::type,F<T>>>(in);
|
||||||
const size_t inum = x.vec(VType<T>::MULT);
|
const size_t inum = x.vec(VType<T>::MULT);
|
||||||
if(x.rootSteps(inum) == 1){
|
if(x.rootSteps(inum) == 1){
|
||||||
CHECK;
|
CHECK;
|
||||||
x();
|
x();
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
th.assign(in)();
|
th.template asx<IAccess<F<T>>>(in)();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
@ -626,7 +628,7 @@ namespace MultiArrayTools
|
||||||
template <class OpClass>
|
template <class OpClass>
|
||||||
OperationRoot<T,Ranges...>& OperationRoot<T,Ranges...>::operator=(const OpClass& in)
|
OperationRoot<T,Ranges...>& OperationRoot<T,Ranges...>::operator=(const OpClass& in)
|
||||||
{
|
{
|
||||||
VExec<OpClass::VABLE>::exec(*this,in);
|
VExec<OpClass::VABLE>::template exec<identity>(*this,in);
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -634,7 +636,8 @@ namespace MultiArrayTools
|
||||||
template <class OpClass>
|
template <class OpClass>
|
||||||
OperationRoot<T,Ranges...>& OperationRoot<T,Ranges...>::operator+=(const OpClass& in)
|
OperationRoot<T,Ranges...>& OperationRoot<T,Ranges...>::operator+=(const OpClass& in)
|
||||||
{
|
{
|
||||||
plus(in)();
|
VExec<OpClass::VABLE>::template exec<xxxplus>(*this,in);
|
||||||
|
//plus(in)();
|
||||||
return *this;
|
return *this;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -661,6 +664,7 @@ namespace MultiArrayTools
|
||||||
template <typename V, class ET>
|
template <typename V, class ET>
|
||||||
inline V& OperationRoot<T,Ranges...>::vget(ET pos) const
|
inline V& OperationRoot<T,Ranges...>::vget(ET pos) const
|
||||||
{
|
{
|
||||||
|
VCHECK(pos.val());
|
||||||
return *(reinterpret_cast<V*>(mDataPtr)+pos.val());
|
return *(reinterpret_cast<V*>(mDataPtr)+pos.val());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -732,46 +736,77 @@ namespace MultiArrayTools
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, class... Ranges>
|
template <typename T, class... Ranges>
|
||||||
template <class OpClass>
|
template <class IOp, class OpClass>
|
||||||
auto ParallelOperationRoot<T,Ranges...>::assign(const OpClass& in)
|
auto ParallelOperationRoot<T,Ranges...>::asx(const OpClass& in) const
|
||||||
-> decltype(mIndex.pifor(1,in.loop(AssignmentExpr2<T,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
|
-> decltype(mIndex.pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
|
||||||
(mOrigDataPtr,*this,in))))
|
(mOrigDataPtr,*this,in))))
|
||||||
|
|
||||||
{
|
{
|
||||||
return mIndex.pifor(1,in.loop(AssignmentExpr2<T,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
|
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" );
|
||||||
(mOrigDataPtr,*this,in)));
|
return mIndex.pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
|
||||||
|
(mOrigDataPtr,*this,in)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, class... Ranges>
|
template <typename T, class... Ranges>
|
||||||
template <class OpClass, class Index>
|
template <class IOp, class OpClass>
|
||||||
auto ParallelOperationRoot<T,Ranges...>::assign(const OpClass& in, const std::shared_ptr<Index>& i) const
|
auto ParallelOperationRoot<T,Ranges...>::asxExpr(const OpClass& in) const
|
||||||
-> decltype(i->pifor(1,in.loop(AssignmentExpr2<T,ParallelOperationRoot<T,Ranges...>,OpClass>
|
-> decltype(in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
|
||||||
(mOrigDataPtr,*this,in))))
|
(mOrigDataPtr,*this,in)))
|
||||||
{
|
{
|
||||||
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" );
|
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" );
|
||||||
return i->pifor(1,in.loop(AssignmentExpr2<T,ParallelOperationRoot<T,Ranges...>,OpClass>
|
return in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
|
||||||
(mOrigDataPtr,*this,in)));
|
(mOrigDataPtr,*this,in));
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class... Ranges>
|
||||||
|
template <class IOp, class OpClass, class Index>
|
||||||
|
auto ParallelOperationRoot<T,Ranges...>::asx(const OpClass& in, const std::shared_ptr<Index>& i) const
|
||||||
|
-> decltype(i->pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
|
||||||
|
(mOrigDataPtr,*this,in))))
|
||||||
|
{
|
||||||
|
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" );
|
||||||
|
return i->pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
|
||||||
|
(mOrigDataPtr,*this,in)));
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, class... Ranges>
|
template <typename T, class... Ranges>
|
||||||
template <class OpClass>
|
template <class OpClass>
|
||||||
auto ParallelOperationRoot<T,Ranges...>::plus(const OpClass& in)
|
auto ParallelOperationRoot<T,Ranges...>::assign(const OpClass& in) const
|
||||||
-> decltype(mIndex.pifor(1,in.loop(AddExpr<T,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
|
-> decltype(this->template asx<IAssign<T>>(in))
|
||||||
(mOrigDataPtr,*this,in))))
|
|
||||||
{
|
{
|
||||||
return mIndex.pifor(1,in.loop(AddExpr<T,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
|
return this->template asx<IAssign<T>>(in);
|
||||||
(mOrigDataPtr,*this,in)));
|
}
|
||||||
|
|
||||||
|
template <typename T, class... Ranges>
|
||||||
|
template <class OpClass>
|
||||||
|
auto ParallelOperationRoot<T,Ranges...>::assignExpr(const OpClass& in) const
|
||||||
|
-> decltype(this->template asxExpr<IAssign<T>>(in))
|
||||||
|
{
|
||||||
|
return this->template asxExpr<IAssign<T>>(in);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class... Ranges>
|
||||||
|
template <class OpClass, class Index>
|
||||||
|
auto ParallelOperationRoot<T,Ranges...>::assign(const OpClass& in, const std::shared_ptr<Index>& i) const
|
||||||
|
-> decltype(this->template asx<IAssign<T>>(in,i))
|
||||||
|
{
|
||||||
|
return this->template asx<IAssign<T>>(in,i);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, class... Ranges>
|
||||||
|
template <class OpClass>
|
||||||
|
auto ParallelOperationRoot<T,Ranges...>::plus(const OpClass& in) const
|
||||||
|
-> decltype(this->template asx<IPlus<T>>(in))
|
||||||
|
{
|
||||||
|
return this->template asx<IPlus<T>>(in);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, class... Ranges>
|
template <typename T, class... Ranges>
|
||||||
template <class OpClass, class Index>
|
template <class OpClass, class Index>
|
||||||
auto ParallelOperationRoot<T,Ranges...>::plus(const OpClass& in, const std::shared_ptr<Index>& i) const
|
auto ParallelOperationRoot<T,Ranges...>::plus(const OpClass& in, const std::shared_ptr<Index>& i) const
|
||||||
-> decltype(i->pifor(1,in.loop(AddExpr<T,ParallelOperationRoot<T,Ranges...>,OpClass>
|
-> decltype(this->template asx<IPlus<T>>(in,i))
|
||||||
(mOrigDataPtr,*this,in))))
|
|
||||||
{
|
{
|
||||||
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" );
|
return this->template asx<IPlus<T>>(in,i);
|
||||||
return i->pifor(1,in.loop(AddExpr<T,ParallelOperationRoot<T,Ranges...>,OpClass>
|
|
||||||
(mOrigDataPtr,*this,in)));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, class... Ranges>
|
template <typename T, class... Ranges>
|
||||||
|
|
|
@ -251,10 +251,14 @@ namespace MultiArrayTools
|
||||||
template <typename T, typename Op, class ExtType>
|
template <typename T, typename Op, class ExtType>
|
||||||
static inline void f(T*& t, size_t pos, const Op& op, ExtType e)
|
static inline void f(T*& t, size_t pos, const Op& op, ExtType e)
|
||||||
{
|
{
|
||||||
|
VCHECK(pos);
|
||||||
VFunc<F>::selfApply(reinterpret_cast<V*>(t)[pos],op.template vget<V>(e));
|
VFunc<F>::selfApply(reinterpret_cast<V*>(t)[pos],op.template vget<V>(e));
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
|
template <typename T>
|
||||||
|
using xxxplus = plus<T>;
|
||||||
|
|
||||||
template <typename T>
|
template <typename T>
|
||||||
using IAssign = IAccess<identity<T>>;
|
using IAssign = IAccess<identity<T>>;
|
||||||
|
|
||||||
|
@ -684,25 +688,39 @@ namespace MultiArrayTools
|
||||||
|
|
||||||
ParallelOperationRoot(T* data, const IndexType& ind);
|
ParallelOperationRoot(T* data, const IndexType& ind);
|
||||||
|
|
||||||
template <class OpClass>
|
template <class IOp, class OpClass>
|
||||||
auto assign(const OpClass& in)
|
auto asx(const OpClass& in) const
|
||||||
-> decltype(mIndex.pifor(1,in.loop(AssignmentExpr2<T,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
|
-> decltype(mIndex.pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
|
||||||
(mOrigDataPtr,*this,in))));
|
(mOrigDataPtr,*this,in))));
|
||||||
|
|
||||||
template <class OpClass, class Index>
|
template <class IOp, class OpClass>
|
||||||
auto assign(const OpClass& in, const std::shared_ptr<Index>& i) const
|
auto asxExpr(const OpClass& in) const
|
||||||
-> decltype(i->pifor(1,in.loop(AssignmentExpr2<T,ParallelOperationRoot<T,Ranges...>,OpClass>
|
-> decltype(in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>(mOrigDataPtr,*this,in)));
|
||||||
|
|
||||||
|
template <class IOp, class OpClass, class Index>
|
||||||
|
auto asx(const OpClass& in, const std::shared_ptr<Index>& i) const
|
||||||
|
-> decltype(i->pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
|
||||||
(mOrigDataPtr,*this,in))));
|
(mOrigDataPtr,*this,in))));
|
||||||
|
|
||||||
|
template <class OpClass>
|
||||||
|
auto assign(const OpClass& in) const
|
||||||
|
-> decltype(this->template asx<IAssign<T>>(in));
|
||||||
|
|
||||||
template <class OpClass>
|
template <class OpClass>
|
||||||
auto plus(const OpClass& in)
|
auto assignExpr(const OpClass& in) const
|
||||||
-> decltype(mIndex.pifor(1,in.loop(AddExpr<T,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
|
-> decltype(this->template asxExpr<IAssign<T>>(in));
|
||||||
(mOrigDataPtr,*this,in))));
|
|
||||||
|
template <class OpClass, class Index>
|
||||||
|
auto assign(const OpClass& in, const std::shared_ptr<Index>& i) const
|
||||||
|
-> decltype(this->template asx<IAssign<T>>(in,i));
|
||||||
|
|
||||||
|
template <class OpClass>
|
||||||
|
auto plus(const OpClass& in) const
|
||||||
|
-> decltype(this->template asx<IPlus<T>>(in));
|
||||||
|
|
||||||
template <class OpClass, class Index>
|
template <class OpClass, class Index>
|
||||||
auto plus(const OpClass& in, const std::shared_ptr<Index>& i) const
|
auto plus(const OpClass& in, const std::shared_ptr<Index>& i) const
|
||||||
-> decltype(i->pifor(1,in.loop(AddExpr<T,ParallelOperationRoot<T,Ranges...>,OpClass>
|
-> decltype(this->template asx<IPlus<T>>(in,i));
|
||||||
(mOrigDataPtr,*this,in))));
|
|
||||||
|
|
||||||
template <class OpClass>
|
template <class OpClass>
|
||||||
ParallelOperationRoot& operator=(const OpClass& in);
|
ParallelOperationRoot& operator=(const OpClass& in);
|
||||||
|
|
|
@ -396,7 +396,9 @@ namespace MultiArrayHelper
|
||||||
virtual std::intptr_t vec(size_t vs) override final
|
virtual std::intptr_t vec(size_t vs) override final
|
||||||
{
|
{
|
||||||
if(mStep == 1 and mMax % vs == 0){
|
if(mStep == 1 and mMax % vs == 0){
|
||||||
|
VCHECK(vs);
|
||||||
mMax /= vs;
|
mMax /= vs;
|
||||||
|
VCHECK(mMax);
|
||||||
return reinterpret_cast<std::intptr_t>(mIndPtr);
|
return reinterpret_cast<std::intptr_t>(mIndPtr);
|
||||||
}
|
}
|
||||||
return mExpr.vec(vs);
|
return mExpr.vec(vs);
|
||||||
|
@ -450,10 +452,14 @@ namespace MultiArrayHelper
|
||||||
PFor(const IndexClass* indPtr,
|
PFor(const IndexClass* indPtr,
|
||||||
size_t step, Expr expr);
|
size_t step, Expr expr);
|
||||||
|
|
||||||
virtual std::intptr_t vec(size_t vs) override final
|
template <size_t VS>
|
||||||
|
auto vec() const
|
||||||
{
|
{
|
||||||
|
// statically distinguish!!!
|
||||||
if(mStep == 1 and mMax % vs == 0){
|
if(mStep == 1 and mMax % vs == 0){
|
||||||
|
VCHECK(vs);
|
||||||
mMax /= vs;
|
mMax /= vs;
|
||||||
|
VCHECK(mMax);
|
||||||
return reinterpret_cast<std::intptr_t>(mIndPtr);
|
return reinterpret_cast<std::intptr_t>(mIndPtr);
|
||||||
}
|
}
|
||||||
return mExpr.vec(vs);
|
return mExpr.vec(vs);
|
||||||
|
|
Loading…
Reference in a new issue