enable multi threading

This commit is contained in:
Christian Zimmermann 2019-01-15 17:41:43 +01:00
parent a1d843c01b
commit 6d1682efa2
12 changed files with 148 additions and 38 deletions

View file

@ -287,6 +287,15 @@ namespace MultiArrayTools
(step, mIPack, mBlockSizes, OpExpr<MapF,IndexPack,Exprs>
( range()->map(), mIPack, mOutIndex, step, exs ) );
}
template <class MapF, class... Indices>
template <class Exprs>
auto MapIndex<MapF,Indices...>::pifor(size_t step, Exprs exs) const
-> decltype(ifor(step, exs))
{
return ifor(step, exs);
}
/*
template <class MapF, class... Indices>
template <class Exprs>

View file

@ -92,6 +92,7 @@ namespace MultiArrayTools
static constexpr size_t totalDim() { return mkTotalDim<Indices...>(); }
static constexpr SpaceType STYPE = SpaceType::ANY;
static constexpr bool PARALLEL = false;
private:
@ -171,6 +172,10 @@ namespace MultiArrayTools
(step, mIPack, mBlockSizes, OpExpr<MapF,IndexPack,Exprs>( range()->map(), mIPack, mOutIndex, step, exs ) ) );
// first step arg not used!
template <class Exprs>
auto pifor(size_t step, Exprs exs) const
-> decltype(ifor(step, exs)); // NO MULTITHREADING
/*
template <class Exprs>
auto iforh(Exprs exs) const

View file

@ -139,9 +139,9 @@ namespace MultiArrayTools
template <typename T, class AOp, class OpClass, class... Ranges>
OperationMaster<T,AOp,OpClass,Ranges...>::
OperationMaster(MutableMultiArrayBase<T,Ranges...>& ma, const OpClass& second,
IndexType& index) :
IndexType& index, bool doParallel) :
mSecond(second), mDataPtr(ma.data()),
mIndex(index)
mIndex(index), mDoParallel(doParallel)
{
performAssignment(0);
}
@ -149,9 +149,9 @@ namespace MultiArrayTools
template <typename T, class AOp, class OpClass, class... Ranges>
OperationMaster<T,AOp,OpClass,Ranges...>::
OperationMaster(T* data, const OpClass& second,
IndexType& index) :
IndexType& index, bool doParallel) :
mSecond(second), mDataPtr(data),
mIndex(index)
mIndex(index), mDoParallel(doParallel)
{
performAssignment(0);
}
@ -160,8 +160,14 @@ namespace MultiArrayTools
void OperationMaster<T,AOp,OpClass,Ranges...>::performAssignment(std::intptr_t blockIndexNum)
{
AssignmentExpr ae(*this, mSecond); // Expression to be executed within loop
const auto loop = mIndex.ifor( 1, mSecond.loop(ae) );
loop(); // execute overall loop(s) and so internal hidden loops and so the inherited expressions
if(mDoParallel){
const auto ploop = mIndex.pifor( 1, mSecond.loop(ae) );
ploop(); // execute overall loop(s) and so internal hidden loops and so the inherited expressions
}
else {
const auto loop = mIndex.ifor( 1, mSecond.loop(ae) );
loop(); // execute overall loop(s) and so internal hidden loops and so the inherited expressions
}
}
template <typename T, class AOp, class OpClass, class... Ranges>
@ -355,14 +361,14 @@ namespace MultiArrayTools
template <class OpClass>
OperationMaster<T,SelfIdentity<T>,OpClass,Ranges...> OperationRoot<T,Ranges...>::operator=(const OpClass& in)
{
return OperationMaster<T,SelfIdentity<T>,OpClass,Ranges...>(mDataPtr, in, mIndex);
return OperationMaster<T,SelfIdentity<T>,OpClass,Ranges...>(mDataPtr, in, mIndex, mDoParallel);
}
template <typename T, class... Ranges>
template <class OpClass>
OperationMaster<T,plus<T>,OpClass,Ranges...> OperationRoot<T,Ranges...>::operator+=(const OpClass& in)
{
return OperationMaster<T,plus<T>,OpClass,Ranges...>(mDataPtr, in, mIndex);
return OperationMaster<T,plus<T>,OpClass,Ranges...>(mDataPtr, in, mIndex, mDoParallel);
}
template <typename T, class... Ranges>
@ -372,6 +378,13 @@ namespace MultiArrayTools
return operator=<OperationRoot<T,Ranges...> >(in);
}
template <typename T, class... Ranges>
OperationRoot<T,Ranges...>& OperationRoot<T,Ranges...>::par()
{
mDoParallel = true;
return *this;
}
template <typename T, class... Ranges>
template <class ET>
inline T OperationRoot<T,Ranges...>::get(ET pos) const

View file

@ -135,10 +135,10 @@ namespace MultiArrayTools
//typedef typename MultiRange<Ranges...>::IndexType IndexType;
OperationMaster(MutableMultiArrayBase<T,Ranges...>& ma, const OpClass& second,
IndexType& index);
IndexType& index, bool doParallel = false);
OperationMaster(T* data, const OpClass& second,
IndexType& index);
IndexType& index, bool doParallel = false);
inline void set(size_t pos, T val) { mDataPtr[pos] = AOp::apply(mDataPtr[pos],val); }
@ -152,6 +152,7 @@ namespace MultiArrayTools
//MutableMultiArrayBase<T,Ranges...>& mArrayRef;
T* mDataPtr;
IndexType mIndex;
bool mDoParallel;
};
@ -292,6 +293,8 @@ namespace MultiArrayTools
OperationMaster<T,SelfIdentity<T>,OperationRoot,Ranges...> operator=(const OperationRoot& in);
OperationRoot& par();
template <class ET>
inline T get(ET pos) const;
@ -315,6 +318,7 @@ namespace MultiArrayTools
T* mDataPtr;
mutable IndexType mIndex;
mutable size_t mOff = 0;
bool mDoParallel = false;
};
template <typename T>

View file

@ -33,6 +33,7 @@ namespace MultiArrayTools
static constexpr size_t totalDim() { return mkTotalDim<Indices...>(); }
static constexpr SpaceType STYPE = SpaceType::ANY;
static constexpr bool PARALLEL = std::tuple_element<0,std::tuple<Indices...>>::type::PARALLEL;
template <typename X>
using CIX = ContainerIndex<X,Indices...>;
@ -155,6 +156,10 @@ namespace MultiArrayTools
auto iforh(size_t step, Exprs exs) const
-> decltype(RPackNum<sizeof...(Indices)-1>::mkForh(step, mIPack, mBlockSizes, exs));
template <class Exprs>
auto pifor(size_t step, Exprs exs) const
-> decltype(RPackNum<sizeof...(Indices)-1>::mkPFor(step, mIPack, mBlockSizes, exs));
std::intptr_t container() const;
ContainerIndex& format(const std::array<size_t,sizeof...(Indices)+1>& blocks);
@ -511,6 +516,14 @@ namespace MultiArrayTools
return RPackNum<sizeof...(Indices)-1>::mkForh(step, mIPack, mBlockSizes, exs);
}
template <typename T, class... Indices>
template <class Exprs>
auto ContainerIndex<T,Indices...>::pifor(size_t step, Exprs exs) const
-> decltype(RPackNum<sizeof...(Indices)-1>::mkPFor(step, mIPack, mBlockSizes, exs))
{
return RPackNum<sizeof...(Indices)-1>::mkPFor(step, mIPack, mBlockSizes, exs);
}
template <typename T, class... Indices>
std::intptr_t ContainerIndex<T,Indices...>::container() const
{

View file

@ -425,6 +425,13 @@ namespace MultiArrayTools
}
}
template <class EC>
template <class Expr>
ExpressionHolder<Expr> DynamicIndex<EC>::pifor(size_t step, Expr ex) const
{
return ifor(step, ex); // no multithreading here at the moment...
}
/***********************
* DynamicRange *
***********************/

View file

@ -237,6 +237,9 @@ namespace MultiArrayTools
template <class Expr>
ExpressionHolder<Expr> iforh(size_t step, Expr ex) const;
template <class Expr>
ExpressionHolder<Expr> pifor(size_t step, Expr ex) const;
};

View file

@ -42,6 +42,7 @@ namespace MultiArrayTools
static constexpr size_t totalDim() { return mkTotalDim<Indices...>(); }
static constexpr SpaceType STYPE = SpaceType::ANY;
static constexpr bool PARALLEL = std::tuple_element<0,std::tuple<Indices...>>::type::PARALLEL;
private:
@ -122,6 +123,11 @@ namespace MultiArrayTools
auto iforh(size_t step, Exprs exs) const
-> decltype(RPackNum<sizeof...(Indices)-1>::mkForh(step, mIPack, mBlockSizes, exs));
template <class Exprs>
auto pifor(size_t step, Exprs exs) const
-> decltype(RPackNum<sizeof...(Indices)-1>::mkPFor(step, mIPack, mBlockSizes, exs));
};
/*************************
@ -459,6 +465,14 @@ namespace MultiArrayTools
return RPackNum<sizeof...(Indices)-1>::mkForh(step, mIPack, mBlockSizes, exs);
}
template <class... Indices>
template <class Exprs>
auto MultiIndex<Indices...>::pifor(size_t step, Exprs exs) const
-> decltype(RPackNum<sizeof...(Indices)-1>::mkPFor(step, mIPack, mBlockSizes, exs))
{
return RPackNum<sizeof...(Indices)-1>::mkPFor(step, mIPack, mBlockSizes, exs);
}
/*************************
* MultiRangeFactory *
*************************/

View file

@ -299,6 +299,17 @@ namespace MultiArrayHelper
->iforh( step*std::get<NN+1>(ba), RPackNum<N-1>::mkForh(step, ipack, ba, exs) );
}
template <class IndexPack, class BlockArray, class Exprs>
static auto mkPFor(size_t step, const IndexPack& ipack, const BlockArray& ba, Exprs exs)
-> decltype(std::get<std::tuple_size<IndexPack>::value-N-1>(ipack)
->pifor( 0, RPackNum<N-1>::mkFor(step, ipack, ba, exs) ) )
{
constexpr size_t NN = std::tuple_size<IndexPack>::value-N-1;
return std::get<NN>(ipack)
->pifor( step*std::get<NN+1>(ba), RPackNum<N-1>::mkFor(step, ipack, ba, exs) );
// mkFor is correct here, because we want to multithread only the FIRST index!!
}
template <class Index>
static inline void getStepSizeX(const Index& ii, std::intptr_t j, size_t& ss, size_t& sx)
{
@ -562,6 +573,16 @@ namespace MultiArrayHelper
->iforh( step*std::get<NN+1>(ba), exs);
}
template <class IndexPack, class BlockArray, class Exprs>
static auto mkPFor(size_t step, const IndexPack& ipack, const BlockArray& ba, Exprs exs)
-> decltype(std::get<std::tuple_size<IndexPack>::value-1>(ipack)
->pifor(0,exs) )
{
constexpr size_t NN = std::tuple_size<IndexPack>::value-1;
return std::get<NN>(ipack)
->pifor( step*std::get<NN+1>(ba), exs);
}
template <class Index>
static inline void getStepSizeX(const Index& ii, std::intptr_t j, size_t& ss, size_t& sx)
{

View file

@ -47,6 +47,7 @@ namespace MultiArrayTools
static constexpr size_t sDim() { return 1; }
static constexpr SpaceType STYPE = TYPE;
static constexpr bool PARALLEL = true;
// ==== >>>>> STATIC POLYMORPHISM <<<<< ====
@ -89,6 +90,10 @@ namespace MultiArrayTools
auto iforh(size_t step, Expr ex) const
-> For<SingleIndex<U,TYPE>,Expr,ForType::HIDDEN>;
template <class Expr>
auto pifor(size_t step, Expr ex) const
-> PFor<SingleIndex<U,TYPE>,Expr>;
private:
std::shared_ptr<RangeType> mExplicitRangePtr;
const U* mMetaPtr;
@ -428,6 +433,15 @@ namespace MultiArrayTools
return For<SingleIndex<U,TYPE>,Expr,ForType::HIDDEN>(this, step, ex);
}
template <typename U, SpaceType TYPE>
template <class Expr>
auto SingleIndex<U,TYPE>::pifor(size_t step, Expr ex) const
-> PFor<SingleIndex<U,TYPE>,Expr>
{
//static const size_t LAYER = typename Expr::LAYER;
return PFor<SingleIndex<U,TYPE>,Expr>(this, step, ex);
}
/********************
* SingleRange *

View file

@ -492,6 +492,7 @@ namespace MultiArrayHelper
inline void PFor<IndexClass,Expr>::operator()(size_t mlast,
ExtType last) const
{
CHECK;
typedef typename IndexClass::RangeType RangeType;
int pos = 0;
size_t mnpos = 0;
@ -511,6 +512,7 @@ namespace MultiArrayHelper
template <class IndexClass, class Expr>
inline void PFor<IndexClass,Expr>::operator()(size_t mlast) const
{
CHECK;
typedef typename IndexClass::RangeType RangeType;
const ExtType last;
int pos = 0;

View file

@ -134,18 +134,20 @@ namespace {
typedef MultiRangeFactory<SR,SR,SR,SR,SR,SR,SR,SR> SR8F;
typedef SR8F::oType SR8;
static const size_t s = 65536*1000;
static const size_t os = 3000;
static const size_t s = 65536*os;
OpTest_Spin()
{
data.resize(s);
for(size_t i = 0; i != s; ++i){
double arg = static_cast<double>( i - s ) - 0.1;
data[i] = sin(arg)/arg;
data[i] = sin(arg);
//VCHECK(data[i]);
}
SRF f;
sr = std::dynamic_pointer_cast<SR>(f.create());
CRF cf(1000);
CRF cf(os);
cr = std::dynamic_pointer_cast<CR>(cf.create());
}
@ -164,23 +166,24 @@ namespace {
MultiArray<double,CR,SR,SR> res1( cr, sr, sr );
auto ii = MAT::getIndex<CR>(cr);
auto jj = MAT::getIndex<CR>(cr);
auto alpha = MAT::getIndex<SR>();
auto beta = MAT::getIndex<SR>();
auto gamma = MAT::getIndex<SR>();
auto delta = MAT::getIndex<SR>();
auto deltap = MAT::getIndex<SR>();
auto mix = MAT::mkMIndex( alpha, beta, gamma );
auto mix = MAT::mkMIndex( alpha, beta, gamma, jj );
std::clock_t begin = std::clock();
//for(size_t i = 0; i != 1000; ++i){
res1(ii ,delta, deltap) += ma(ii, delta, alpha, alpha, beta, beta, gamma, gamma, deltap).c(mix);
//for(size_t i = 0; i != os; ++i){
res1(ii ,delta, deltap).par() += ma(ii, delta, alpha, alpha, beta, beta, gamma, gamma, deltap).c(mix);
//}
std::clock_t end = std::clock();
std::cout << "MultiArray time: " << static_cast<double>( end - begin ) / CLOCKS_PER_SEC
<< std::endl;
std::vector<double> vres(4*4*1000);
std::vector<double> vres(4*4*os);
for(size_t d = 0; d != 4; ++d){
for(size_t p = 0; p != 4; ++p){
const size_t tidx = d*4 + p;
@ -188,20 +191,22 @@ namespace {
}
}
std::clock_t begin2 = std::clock();
for(size_t i = 0; i != 1000; ++i){
for(size_t a = 0; a != 4; ++a){
for(size_t b = 0; b != 4; ++b){
for(size_t c = 0; c != 4; ++c){
for(size_t d = 0; d != 4; ++d){
for(size_t p = 0; p != 4; ++p){
const size_t tidx = i*4*4 + d*4 + p;
const size_t sidx = i*65536 + d*4*4*4*4*4*4*4 + a*5*4*4*4*4*4 + b*5*4*4*4 + c*5*4 + p;
vres[tidx] += data[sidx];
}
}
}
}
}
for(size_t j = 0; j != os; ++j) {
for(size_t i = 0; i != os; ++i){
for(size_t a = 0; a != 4; ++a){
for(size_t b = 0; b != 4; ++b){
for(size_t c = 0; c != 4; ++c){
for(size_t d = 0; d != 4; ++d){
for(size_t p = 0; p != 4; ++p){
const size_t tidx = i*4*4 + d*4 + p;
const size_t sidx = i*65536 + d*4*4*4*4*4*4*4 + a*5*4*4*4*4*4 + b*5*4*4*4 + c*5*4 + p;
vres[tidx] += data[sidx];
}
}
}
}
}
}
}
std::clock_t end2 = std::clock();