auto-vectorization works

This commit is contained in:
Christian Zimmermann 2021-01-24 02:10:06 +01:00
parent 269ff69ec3
commit 867c20959a
5 changed files with 72 additions and 28 deletions

View file

@ -57,6 +57,7 @@ namespace MultiArrayTools
//typedef SingleIndex<typename Op::value_type,STYPE> OIType;
static constexpr size_t LAYER = Expr::LAYER + 1;
static constexpr size_t SIZE = Expr::SIZE + Op::SIZE;
static constexpr size_t NHLAYER = Expr::NHLAYER + 1;
private:
OpExpr() = default;

View file

@ -346,7 +346,7 @@ namespace MultiArrayTools
template <typename V, class ET>
inline const V& ConstOperationRoot<T,Ranges...>::vget(ET pos) const
{
VCHECK(pos.val());
//VCHECK(pos.val());
return *(reinterpret_cast<const V*>(mDataPtr+pos.val()));
}
@ -665,7 +665,7 @@ namespace MultiArrayTools
template <typename V, class ET>
inline V& OperationRoot<T,Ranges...>::vget(ET pos) const
{
VCHECK(pos.val());
//VCHECK(pos.val());
return *(reinterpret_cast<V*>(mDataPtr+pos.val()));
}
@ -740,12 +740,12 @@ namespace MultiArrayTools
template <class IOp, class OpClass>
auto ParallelOperationRoot<T,Ranges...>::asx(const OpClass& in) const
-> decltype(mIndex.pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
(mOrigDataPtr,*this,in))))
(mOrigDataPtr,*this,in))).template vec<IOp::VSIZE>())
{
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" );
return mIndex.pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
(mOrigDataPtr,*this,in)));
(mOrigDataPtr,*this,in))).template vec<IOp::VSIZE>();
}
template <typename T, class... Ranges>
@ -763,11 +763,11 @@ namespace MultiArrayTools
template <class IOp, class OpClass, class Index>
auto ParallelOperationRoot<T,Ranges...>::asx(const OpClass& in, const std::shared_ptr<Index>& i) const
-> decltype(i->pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
(mOrigDataPtr,*this,in))))
(mOrigDataPtr,*this,in))).template vec<IOp::VSIZE>())
{
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" );
return i->pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
(mOrigDataPtr,*this,in)));
(mOrigDataPtr,*this,in))).template vec<IOp::VSIZE>();
}
template <typename T, class... Ranges>
@ -814,7 +814,7 @@ namespace MultiArrayTools
template <class OpClass>
ParallelOperationRoot<T,Ranges...>& ParallelOperationRoot<T,Ranges...>::operator=(const OpClass& in)
{
assign(in)();
VExec<OpClass::VABLE>::template exec<identity>(*this,in);
return *this;
}
@ -822,7 +822,7 @@ namespace MultiArrayTools
template <class OpClass>
ParallelOperationRoot<T,Ranges...>& ParallelOperationRoot<T,Ranges...>::operator+=(const OpClass& in)
{
plus(in)();
VExec<OpClass::VABLE>::template exec<xxxplus>(*this,in);
return *this;
}

View file

@ -259,7 +259,7 @@ namespace MultiArrayTools
template <typename Op, class ExtType>
static inline void f(T*& t, size_t pos, const Op& op, ExtType e)
{
VCHECK(pos);
//VCHECK(pos);
VFunc<F>::selfApply(*reinterpret_cast<value_type*>(t+pos),op.template vget<value_type>(e));
}
};
@ -332,6 +332,7 @@ namespace MultiArrayTools
public:
static constexpr size_t LAYER = 0;
static constexpr size_t NHLAYER = 0;
static constexpr size_t SIZE = Target::SIZE + OpClass::SIZE;
typedef decltype(mTar.rootSteps(0).extend( mSec.rootSteps(0) )) ExtType;
@ -371,7 +372,8 @@ namespace MultiArrayTools
public:
static constexpr size_t LAYER = 0;
static constexpr size_t SIZE = RootSum<Ops...>::SIZE;
static constexpr size_t NHLAYER = 0;
static constexpr size_t SIZE = RootSum<Ops...>::SIZE;
typedef decltype(RootSumN<sizeof...(Ops)-1>::rootSteps(mOps,0) ) ExtType;
MOp(const Ops&... exprs);
@ -410,6 +412,7 @@ namespace MultiArrayTools
public:
static constexpr size_t LAYER = 0;
static constexpr size_t NHLAYER = 0;
static constexpr size_t SIZE = OpClass::SIZE + NextExpr::SIZE;
typedef decltype(mSec.rootSteps(0).extend( mNExpr.rootSteps(0) ) ) ExtType;
@ -699,7 +702,7 @@ namespace MultiArrayTools
template <class IOp, class OpClass>
auto asx(const OpClass& in) const
-> decltype(mIndex.pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
(mOrigDataPtr,*this,in))));
(mOrigDataPtr,*this,in))).template vec<IOp::VSIZE>());
template <class IOp, class OpClass>
auto asxExpr(const OpClass& in) const
@ -708,7 +711,7 @@ namespace MultiArrayTools
template <class IOp, class OpClass, class Index>
auto asx(const OpClass& in, const std::shared_ptr<Index>& i) const
-> decltype(i->pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
(mOrigDataPtr,*this,in))));
(mOrigDataPtr,*this,in))).template vec<IOp::VSIZE>());
template <class OpClass>
auto assign(const OpClass& in) const

View file

@ -271,6 +271,7 @@ namespace MultiArrayHelper
static constexpr size_t LAYER = Expr::LAYER + 1;
static constexpr size_t SIZE = Expr::SIZE;
static constexpr size_t NHLAYER = Expr::NHLAYER + 1;
SingleExpression(const SingleExpression& in) = default;
SingleExpression& operator=(const SingleExpression& in) = default;
@ -326,6 +327,7 @@ namespace MultiArrayHelper
static constexpr size_t LAYER = Expr::LAYER + 1;
static constexpr size_t SIZE = Expr::SIZE + 1;
static constexpr size_t NHLAYER = Expr::NHLAYER + 1;
SubExpr(const SubExpr& in) = default;
SubExpr& operator=(const SubExpr& in) = default;
@ -410,6 +412,36 @@ namespace MultiArrayHelper
}
};
template <ForType FT, size_t LAYER>
struct NHLayer
{
template <class Expr>
static constexpr size_t get()
{
return Expr::NHLAYER + 1;
}
};
template <size_t LAYER>
struct NHLayer<ForType::HIDDEN,LAYER>
{
template <class Expr>
static constexpr size_t get()
{
return 0;
}
};
template <>
struct NHLayer<ForType::DEFAULT,1>
{
template <class Expr>
static constexpr size_t get()
{
return Expr::LAYER;
}
};
template <class IndexClass, class Expr, ForType FT, size_t DIV>
class For : public ExpressionBase
{
@ -430,11 +462,12 @@ namespace MultiArrayHelper
public:
typedef ExpressionBase EB;
static constexpr size_t LAYER = Expr::LAYER + 1;
static constexpr size_t SIZE = Expr::SIZE;
static constexpr size_t MAX = RangeType::SIZE / DIV;
static constexpr size_t NHLAYER = (FT == ForType::HIDDEN) ? 0 : Expr::NHLAYER + 1;
For(const For& in) = default;
For& operator=(const For& in) = default;
For(For&& in) = default;
@ -455,8 +488,10 @@ namespace MultiArrayHelper
virtual std::intptr_t vI() const override final
{
if(mStep == 1 and LAYER == 1 and mMax % DIV == 0){
VCHECK(LAYER);
if(mStep == 1 and NHLAYER == 1 and mMax % DIV == 0){
//if(mStep == 1 and mMax % DIV == 0){
//VCHECK(LAYER);
//VCHECK(NHLAYER);
return reinterpret_cast<std::intptr_t>(mIndPtr);
}
return mExpr.vI();
@ -465,9 +500,9 @@ namespace MultiArrayHelper
template <size_t VS>
auto vec() const
{
typedef typename MkVFor<LAYER,RangeType::SIZE % DIV == 0 or RangeType::SIZE == static_cast<size_t>(-1)>::
template type<VS,IndexClass,decltype(MkVExpr<LAYER>::template mk<VS>(mExpr)),FT> oType;
return oType(mIndPtr,mStep,MkVExpr<LAYER>::template mk<VS>(mExpr));
typedef typename MkVFor<NHLAYER,RangeType::SIZE % DIV == 0 or RangeType::SIZE == static_cast<size_t>(-1)>::
template type<VS,IndexClass,decltype(MkVExpr<NHLAYER>::template mk<VS>(mExpr)),FT> oType;
return oType(mIndPtr,mStep,MkVExpr<NHLAYER>::template mk<VS>(mExpr));
}
inline void operator()(size_t mlast, DExt last) override final;
@ -509,6 +544,7 @@ namespace MultiArrayHelper
static constexpr size_t LAYER = Expr::LAYER + 1;
static constexpr size_t SIZE = Expr::SIZE;
static constexpr size_t MAX = RangeType::SIZE / DIV;
static constexpr size_t NHLAYER = Expr::NHLAYER + 1;
PFor(const PFor& in) = default;
PFor& operator=(const PFor& in) = default;
@ -524,8 +560,10 @@ namespace MultiArrayHelper
//virtual size_t divResid() const override final { return mMax % DIV + MkVExpr<LAYER>::divResid(mExpr); }
virtual std::intptr_t vI() const override final
{
if(mStep == 1 and LAYER == 1 and mMax % DIV == 0){
VCHECK(LAYER);
if(mStep == 1 and NHLAYER == 1 and mMax % DIV == 0){
//if(mStep == 1 and mMax % DIV == 0){
//VCHECK(LAYER);
//VCHECK(LAYER);
return reinterpret_cast<std::intptr_t>(mIndPtr);
}
return mExpr.vI();
@ -534,9 +572,9 @@ namespace MultiArrayHelper
template <size_t VS>
auto vec() const
{
typedef typename MkVFor<LAYER,RangeType::SIZE % DIV == 0 or RangeType::SIZE == static_cast<size_t>(-1)>::
template ptype<VS,IndexClass,decltype(MkVExpr<LAYER>::template mk<VS>(mExpr))> oType;
return oType(mIndPtr,mStep,MkVExpr<LAYER>::template mk<VS>(mExpr));
typedef typename MkVFor<NHLAYER,RangeType::SIZE % DIV == 0 or RangeType::SIZE == static_cast<size_t>(-1)>::
template ptype<VS,IndexClass,decltype(MkVExpr<NHLAYER>::template mk<VS>(mExpr))> oType;
return oType(mIndPtr,mStep,MkVExpr<NHLAYER>::template mk<VS>(mExpr));
}
virtual std::shared_ptr<ExpressionBase> deepCopy() const override final
@ -574,6 +612,7 @@ namespace MultiArrayHelper
static constexpr size_t LAYER = 0;
static constexpr size_t SIZE = 0;
static constexpr size_t NHLAYER = 0;
DynamicExpression(const DynamicExpression& in) :
mThreadId(omp_get_thread_num()),
@ -649,6 +688,7 @@ namespace MultiArrayHelper
static constexpr size_t LAYER = Expr::LAYER + 1;
static constexpr size_t SIZE = Expr::SIZE;
static constexpr size_t NHLAYER = Expr::NHLAYER + 1;
ExpressionHolder(const ExpressionHolder& in) = default;
ExpressionHolder(ExpressionHolder&& in) = default;

View file

@ -174,8 +174,8 @@ namespace {
auto beta = MAT::getIndex<SR>();
auto gamma = MAT::getIndex<SR>();
auto delta = MAT::getIndex<SR>();
//auto deltap = MAT::getIndex<SR>();
auto deltap = MAT::getIndex<GenSingleRange<size_t,SpaceType::NONE,1>>();
auto deltap = MAT::getIndex<SR>();
//auto deltap = MAT::getIndex<GenSingleRange<size_t,SpaceType::NONE,1>>();
auto mix = MAT::mkMIndex( jj, alpha, beta, gamma );
@ -217,8 +217,8 @@ namespace {
auto begin = std::chrono::system_clock::now();
//for(size_t i = 0; i != os; ++i){
//res1(ii ,delta, deltap).par() += ma(ii, delta, alpha, alpha, beta, beta, gamma, gamma, deltap).c(mix);
tcast<v256>(res1)(ii ,delta, deltap).par() += tcast<v256>(ma)(delta, alpha, alpha, beta, beta, gamma, gamma, deltap).c(mix);
res1(ii ,delta, deltap).par() += ma(delta, alpha, alpha, beta, beta, gamma, gamma, deltap).c(mix);
//tcast<v256>(res1)(ii ,delta, deltap).par() += tcast<v256>(ma)(delta, alpha, alpha, beta, beta, gamma, gamma, deltap).c(mix);
//}
auto end = std::chrono::system_clock::now();
std::cout << "MultiArray time: " << std::chrono::duration<double>(end-begin).count()