auto-vectorization works
This commit is contained in:
parent
269ff69ec3
commit
867c20959a
5 changed files with 72 additions and 28 deletions
|
@ -57,6 +57,7 @@ namespace MultiArrayTools
|
|||
//typedef SingleIndex<typename Op::value_type,STYPE> OIType;
|
||||
static constexpr size_t LAYER = Expr::LAYER + 1;
|
||||
static constexpr size_t SIZE = Expr::SIZE + Op::SIZE;
|
||||
static constexpr size_t NHLAYER = Expr::NHLAYER + 1;
|
||||
|
||||
private:
|
||||
OpExpr() = default;
|
||||
|
|
|
@ -346,7 +346,7 @@ namespace MultiArrayTools
|
|||
template <typename V, class ET>
|
||||
inline const V& ConstOperationRoot<T,Ranges...>::vget(ET pos) const
|
||||
{
|
||||
VCHECK(pos.val());
|
||||
//VCHECK(pos.val());
|
||||
return *(reinterpret_cast<const V*>(mDataPtr+pos.val()));
|
||||
}
|
||||
|
||||
|
@ -665,7 +665,7 @@ namespace MultiArrayTools
|
|||
template <typename V, class ET>
|
||||
inline V& OperationRoot<T,Ranges...>::vget(ET pos) const
|
||||
{
|
||||
VCHECK(pos.val());
|
||||
//VCHECK(pos.val());
|
||||
return *(reinterpret_cast<V*>(mDataPtr+pos.val()));
|
||||
}
|
||||
|
||||
|
@ -740,12 +740,12 @@ namespace MultiArrayTools
|
|||
template <class IOp, class OpClass>
|
||||
auto ParallelOperationRoot<T,Ranges...>::asx(const OpClass& in) const
|
||||
-> decltype(mIndex.pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
|
||||
(mOrigDataPtr,*this,in))))
|
||||
(mOrigDataPtr,*this,in))).template vec<IOp::VSIZE>())
|
||||
|
||||
{
|
||||
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" );
|
||||
return mIndex.pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
|
||||
(mOrigDataPtr,*this,in)));
|
||||
(mOrigDataPtr,*this,in))).template vec<IOp::VSIZE>();
|
||||
}
|
||||
|
||||
template <typename T, class... Ranges>
|
||||
|
@ -763,11 +763,11 @@ namespace MultiArrayTools
|
|||
template <class IOp, class OpClass, class Index>
|
||||
auto ParallelOperationRoot<T,Ranges...>::asx(const OpClass& in, const std::shared_ptr<Index>& i) const
|
||||
-> decltype(i->pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
|
||||
(mOrigDataPtr,*this,in))))
|
||||
(mOrigDataPtr,*this,in))).template vec<IOp::VSIZE>())
|
||||
{
|
||||
static_assert( OpClass::SIZE == decltype(in.rootSteps())::SIZE, "Ext Size mismatch" );
|
||||
return i->pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
|
||||
(mOrigDataPtr,*this,in)));
|
||||
(mOrigDataPtr,*this,in))).template vec<IOp::VSIZE>();
|
||||
}
|
||||
|
||||
template <typename T, class... Ranges>
|
||||
|
@ -814,7 +814,7 @@ namespace MultiArrayTools
|
|||
template <class OpClass>
|
||||
ParallelOperationRoot<T,Ranges...>& ParallelOperationRoot<T,Ranges...>::operator=(const OpClass& in)
|
||||
{
|
||||
assign(in)();
|
||||
VExec<OpClass::VABLE>::template exec<identity>(*this,in);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
@ -822,7 +822,7 @@ namespace MultiArrayTools
|
|||
template <class OpClass>
|
||||
ParallelOperationRoot<T,Ranges...>& ParallelOperationRoot<T,Ranges...>::operator+=(const OpClass& in)
|
||||
{
|
||||
plus(in)();
|
||||
VExec<OpClass::VABLE>::template exec<xxxplus>(*this,in);
|
||||
return *this;
|
||||
}
|
||||
|
||||
|
|
|
@ -259,7 +259,7 @@ namespace MultiArrayTools
|
|||
template <typename Op, class ExtType>
|
||||
static inline void f(T*& t, size_t pos, const Op& op, ExtType e)
|
||||
{
|
||||
VCHECK(pos);
|
||||
//VCHECK(pos);
|
||||
VFunc<F>::selfApply(*reinterpret_cast<value_type*>(t+pos),op.template vget<value_type>(e));
|
||||
}
|
||||
};
|
||||
|
@ -332,6 +332,7 @@ namespace MultiArrayTools
|
|||
public:
|
||||
|
||||
static constexpr size_t LAYER = 0;
|
||||
static constexpr size_t NHLAYER = 0;
|
||||
static constexpr size_t SIZE = Target::SIZE + OpClass::SIZE;
|
||||
typedef decltype(mTar.rootSteps(0).extend( mSec.rootSteps(0) )) ExtType;
|
||||
|
||||
|
@ -371,7 +372,8 @@ namespace MultiArrayTools
|
|||
|
||||
public:
|
||||
static constexpr size_t LAYER = 0;
|
||||
static constexpr size_t SIZE = RootSum<Ops...>::SIZE;
|
||||
static constexpr size_t NHLAYER = 0;
|
||||
static constexpr size_t SIZE = RootSum<Ops...>::SIZE;
|
||||
typedef decltype(RootSumN<sizeof...(Ops)-1>::rootSteps(mOps,0) ) ExtType;
|
||||
|
||||
MOp(const Ops&... exprs);
|
||||
|
@ -410,6 +412,7 @@ namespace MultiArrayTools
|
|||
public:
|
||||
|
||||
static constexpr size_t LAYER = 0;
|
||||
static constexpr size_t NHLAYER = 0;
|
||||
static constexpr size_t SIZE = OpClass::SIZE + NextExpr::SIZE;
|
||||
typedef decltype(mSec.rootSteps(0).extend( mNExpr.rootSteps(0) ) ) ExtType;
|
||||
|
||||
|
@ -699,7 +702,7 @@ namespace MultiArrayTools
|
|||
template <class IOp, class OpClass>
|
||||
auto asx(const OpClass& in) const
|
||||
-> decltype(mIndex.pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass,OpIndexAff::TARGET>
|
||||
(mOrigDataPtr,*this,in))));
|
||||
(mOrigDataPtr,*this,in))).template vec<IOp::VSIZE>());
|
||||
|
||||
template <class IOp, class OpClass>
|
||||
auto asxExpr(const OpClass& in) const
|
||||
|
@ -708,7 +711,7 @@ namespace MultiArrayTools
|
|||
template <class IOp, class OpClass, class Index>
|
||||
auto asx(const OpClass& in, const std::shared_ptr<Index>& i) const
|
||||
-> decltype(i->pifor(1,in.loop(AssignmentExpr<T,IOp,ParallelOperationRoot<T,Ranges...>,OpClass>
|
||||
(mOrigDataPtr,*this,in))));
|
||||
(mOrigDataPtr,*this,in))).template vec<IOp::VSIZE>());
|
||||
|
||||
template <class OpClass>
|
||||
auto assign(const OpClass& in) const
|
||||
|
|
|
@ -271,6 +271,7 @@ namespace MultiArrayHelper
|
|||
|
||||
static constexpr size_t LAYER = Expr::LAYER + 1;
|
||||
static constexpr size_t SIZE = Expr::SIZE;
|
||||
static constexpr size_t NHLAYER = Expr::NHLAYER + 1;
|
||||
|
||||
SingleExpression(const SingleExpression& in) = default;
|
||||
SingleExpression& operator=(const SingleExpression& in) = default;
|
||||
|
@ -326,6 +327,7 @@ namespace MultiArrayHelper
|
|||
|
||||
static constexpr size_t LAYER = Expr::LAYER + 1;
|
||||
static constexpr size_t SIZE = Expr::SIZE + 1;
|
||||
static constexpr size_t NHLAYER = Expr::NHLAYER + 1;
|
||||
|
||||
SubExpr(const SubExpr& in) = default;
|
||||
SubExpr& operator=(const SubExpr& in) = default;
|
||||
|
@ -410,6 +412,36 @@ namespace MultiArrayHelper
|
|||
}
|
||||
};
|
||||
|
||||
template <ForType FT, size_t LAYER>
|
||||
struct NHLayer
|
||||
{
|
||||
template <class Expr>
|
||||
static constexpr size_t get()
|
||||
{
|
||||
return Expr::NHLAYER + 1;
|
||||
}
|
||||
};
|
||||
|
||||
template <size_t LAYER>
|
||||
struct NHLayer<ForType::HIDDEN,LAYER>
|
||||
{
|
||||
template <class Expr>
|
||||
static constexpr size_t get()
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
};
|
||||
|
||||
template <>
|
||||
struct NHLayer<ForType::DEFAULT,1>
|
||||
{
|
||||
template <class Expr>
|
||||
static constexpr size_t get()
|
||||
{
|
||||
return Expr::LAYER;
|
||||
}
|
||||
};
|
||||
|
||||
template <class IndexClass, class Expr, ForType FT, size_t DIV>
|
||||
class For : public ExpressionBase
|
||||
{
|
||||
|
@ -430,11 +462,12 @@ namespace MultiArrayHelper
|
|||
|
||||
public:
|
||||
typedef ExpressionBase EB;
|
||||
|
||||
|
||||
static constexpr size_t LAYER = Expr::LAYER + 1;
|
||||
static constexpr size_t SIZE = Expr::SIZE;
|
||||
static constexpr size_t MAX = RangeType::SIZE / DIV;
|
||||
|
||||
static constexpr size_t NHLAYER = (FT == ForType::HIDDEN) ? 0 : Expr::NHLAYER + 1;
|
||||
|
||||
For(const For& in) = default;
|
||||
For& operator=(const For& in) = default;
|
||||
For(For&& in) = default;
|
||||
|
@ -455,8 +488,10 @@ namespace MultiArrayHelper
|
|||
|
||||
virtual std::intptr_t vI() const override final
|
||||
{
|
||||
if(mStep == 1 and LAYER == 1 and mMax % DIV == 0){
|
||||
VCHECK(LAYER);
|
||||
if(mStep == 1 and NHLAYER == 1 and mMax % DIV == 0){
|
||||
//if(mStep == 1 and mMax % DIV == 0){
|
||||
//VCHECK(LAYER);
|
||||
//VCHECK(NHLAYER);
|
||||
return reinterpret_cast<std::intptr_t>(mIndPtr);
|
||||
}
|
||||
return mExpr.vI();
|
||||
|
@ -465,9 +500,9 @@ namespace MultiArrayHelper
|
|||
template <size_t VS>
|
||||
auto vec() const
|
||||
{
|
||||
typedef typename MkVFor<LAYER,RangeType::SIZE % DIV == 0 or RangeType::SIZE == static_cast<size_t>(-1)>::
|
||||
template type<VS,IndexClass,decltype(MkVExpr<LAYER>::template mk<VS>(mExpr)),FT> oType;
|
||||
return oType(mIndPtr,mStep,MkVExpr<LAYER>::template mk<VS>(mExpr));
|
||||
typedef typename MkVFor<NHLAYER,RangeType::SIZE % DIV == 0 or RangeType::SIZE == static_cast<size_t>(-1)>::
|
||||
template type<VS,IndexClass,decltype(MkVExpr<NHLAYER>::template mk<VS>(mExpr)),FT> oType;
|
||||
return oType(mIndPtr,mStep,MkVExpr<NHLAYER>::template mk<VS>(mExpr));
|
||||
}
|
||||
|
||||
inline void operator()(size_t mlast, DExt last) override final;
|
||||
|
@ -509,6 +544,7 @@ namespace MultiArrayHelper
|
|||
static constexpr size_t LAYER = Expr::LAYER + 1;
|
||||
static constexpr size_t SIZE = Expr::SIZE;
|
||||
static constexpr size_t MAX = RangeType::SIZE / DIV;
|
||||
static constexpr size_t NHLAYER = Expr::NHLAYER + 1;
|
||||
|
||||
PFor(const PFor& in) = default;
|
||||
PFor& operator=(const PFor& in) = default;
|
||||
|
@ -524,8 +560,10 @@ namespace MultiArrayHelper
|
|||
//virtual size_t divResid() const override final { return mMax % DIV + MkVExpr<LAYER>::divResid(mExpr); }
|
||||
virtual std::intptr_t vI() const override final
|
||||
{
|
||||
if(mStep == 1 and LAYER == 1 and mMax % DIV == 0){
|
||||
VCHECK(LAYER);
|
||||
if(mStep == 1 and NHLAYER == 1 and mMax % DIV == 0){
|
||||
//if(mStep == 1 and mMax % DIV == 0){
|
||||
//VCHECK(LAYER);
|
||||
//VCHECK(LAYER);
|
||||
return reinterpret_cast<std::intptr_t>(mIndPtr);
|
||||
}
|
||||
return mExpr.vI();
|
||||
|
@ -534,9 +572,9 @@ namespace MultiArrayHelper
|
|||
template <size_t VS>
|
||||
auto vec() const
|
||||
{
|
||||
typedef typename MkVFor<LAYER,RangeType::SIZE % DIV == 0 or RangeType::SIZE == static_cast<size_t>(-1)>::
|
||||
template ptype<VS,IndexClass,decltype(MkVExpr<LAYER>::template mk<VS>(mExpr))> oType;
|
||||
return oType(mIndPtr,mStep,MkVExpr<LAYER>::template mk<VS>(mExpr));
|
||||
typedef typename MkVFor<NHLAYER,RangeType::SIZE % DIV == 0 or RangeType::SIZE == static_cast<size_t>(-1)>::
|
||||
template ptype<VS,IndexClass,decltype(MkVExpr<NHLAYER>::template mk<VS>(mExpr))> oType;
|
||||
return oType(mIndPtr,mStep,MkVExpr<NHLAYER>::template mk<VS>(mExpr));
|
||||
}
|
||||
|
||||
virtual std::shared_ptr<ExpressionBase> deepCopy() const override final
|
||||
|
@ -574,6 +612,7 @@ namespace MultiArrayHelper
|
|||
|
||||
static constexpr size_t LAYER = 0;
|
||||
static constexpr size_t SIZE = 0;
|
||||
static constexpr size_t NHLAYER = 0;
|
||||
|
||||
DynamicExpression(const DynamicExpression& in) :
|
||||
mThreadId(omp_get_thread_num()),
|
||||
|
@ -649,6 +688,7 @@ namespace MultiArrayHelper
|
|||
|
||||
static constexpr size_t LAYER = Expr::LAYER + 1;
|
||||
static constexpr size_t SIZE = Expr::SIZE;
|
||||
static constexpr size_t NHLAYER = Expr::NHLAYER + 1;
|
||||
|
||||
ExpressionHolder(const ExpressionHolder& in) = default;
|
||||
ExpressionHolder(ExpressionHolder&& in) = default;
|
||||
|
|
|
@ -174,8 +174,8 @@ namespace {
|
|||
auto beta = MAT::getIndex<SR>();
|
||||
auto gamma = MAT::getIndex<SR>();
|
||||
auto delta = MAT::getIndex<SR>();
|
||||
//auto deltap = MAT::getIndex<SR>();
|
||||
auto deltap = MAT::getIndex<GenSingleRange<size_t,SpaceType::NONE,1>>();
|
||||
auto deltap = MAT::getIndex<SR>();
|
||||
//auto deltap = MAT::getIndex<GenSingleRange<size_t,SpaceType::NONE,1>>();
|
||||
|
||||
auto mix = MAT::mkMIndex( jj, alpha, beta, gamma );
|
||||
|
||||
|
@ -217,8 +217,8 @@ namespace {
|
|||
|
||||
auto begin = std::chrono::system_clock::now();
|
||||
//for(size_t i = 0; i != os; ++i){
|
||||
//res1(ii ,delta, deltap).par() += ma(ii, delta, alpha, alpha, beta, beta, gamma, gamma, deltap).c(mix);
|
||||
tcast<v256>(res1)(ii ,delta, deltap).par() += tcast<v256>(ma)(delta, alpha, alpha, beta, beta, gamma, gamma, deltap).c(mix);
|
||||
res1(ii ,delta, deltap).par() += ma(delta, alpha, alpha, beta, beta, gamma, gamma, deltap).c(mix);
|
||||
//tcast<v256>(res1)(ii ,delta, deltap).par() += tcast<v256>(ma)(delta, alpha, alpha, beta, beta, gamma, gamma, deltap).c(mix);
|
||||
//}
|
||||
auto end = std::chrono::system_clock::now();
|
||||
std::cout << "MultiArray time: " << std::chrono::duration<double>(end-begin).count()
|
||||
|
|
Loading…
Reference in a new issue