From e04d9aa5bc8198be9af03ff16332763e3b242670 Mon Sep 17 00:00:00 2001 From: Christian Zimmermann Date: Mon, 14 Jan 2019 18:39:09 +0100 Subject: [PATCH] parallel for --- CMakeLists.txt | 2 +- src/include/xfor/xfor.h | 158 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 159 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index df290d9..ba65885 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 2.8) project(multi_array) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -std=c++11 -Wpedantic -O3 -g -march=native") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -std=c++11 -Wpedantic -O3 -g -march=native -fopenmp") enable_testing() diff --git a/src/include/xfor/xfor.h b/src/include/xfor/xfor.h index 727a263..dfeb3d6 100644 --- a/src/include/xfor/xfor.h +++ b/src/include/xfor/xfor.h @@ -9,6 +9,8 @@ #include "xfor/for_utils.h" #include "xfor/exttype.h" +#include + #define VCHECK(a) std::cout << __FILE__ << ": @" << __LINE__ \ << " in " << __func__ << ": " << #a << " = " << a << std::endl; @@ -184,6 +186,9 @@ namespace MultiArrayHelper auto extension() const -> ExtType; }; + template + class PFor; + template class For : public ExpressionBase { @@ -222,6 +227,54 @@ namespace MultiArrayHelper inline void operator()(size_t mlast, ExtType last) const; inline void operator()(size_t mlast = 0) const override final; + PFor parallel() const; + + DExt dRootSteps(std::intptr_t iPtrNum = 0) const override final; + DExt dExtension() const override final; + + auto rootSteps(std::intptr_t iPtrNum = 0) const -> ExtType; + auto extension() const -> ExtType; + + }; + + template + class PFor : public ExpressionBase + { + private: + PFor() = default; + + const IndexClass* mIndPtr; + size_t mSPos; + size_t mMax; + size_t mStep; + + Expr mExpr; + typedef decltype(mExpr.rootSteps()) ExtType; + ExtType mExt; + + mutable ExtType mRootSteps; + + public: + typedef ExpressionBase EB; + + static constexpr size_t LAYER = Expr::LAYER + 1; + static constexpr size_t SIZE = Expr::SIZE; + + PFor(const PFor& in) = default; + PFor& operator=(const PFor& in) = default; + PFor(PFor&& in) = default; + PFor& operator=(PFor&& in) = default; + + PFor(const std::shared_ptr& indPtr, + size_t step, Expr expr); + + PFor(const IndexClass* indPtr, + size_t step, Expr expr); + + inline void operator()(size_t mlast, DExt last) const override final; + inline void operator()(size_t mlast, ExtType last) const; + inline void operator()(size_t mlast = 0) const override final; + DExt dRootSteps(std::intptr_t iPtrNum = 0) const override final; DExt dExtension() const override final; @@ -400,6 +453,111 @@ namespace MultiArrayHelper sizeof(ExtType)/sizeof(size_t)); } + template + PFor For::parallel() const + { + static_assert(FT == ForType::DEFAULT, "hidden for not parallelizable"); + return PFor(mIndPtr, mStep, mExpr); + } + + /****************** + * P F o r * + ******************/ + + template + PFor::PFor(const std::shared_ptr& indPtr, + size_t step, Expr expr) : + mIndPtr(indPtr.get()), mSPos(mIndPtr->pos()), mMax(mIndPtr->max()), mStep(step), + mExpr(expr), mExt(mExpr.rootSteps( reinterpret_cast( mIndPtr ))) + { + assert(mIndPtr != nullptr); + } + + template + PFor::PFor(const IndexClass* indPtr, + size_t step, Expr expr) : + mIndPtr(indPtr), mSPos(mIndPtr->pos()), mMax(mIndPtr->max()), mStep(step), + mExpr(expr), mExt(mExpr.rootSteps( reinterpret_cast( mIndPtr ))) + { + assert(mIndPtr != nullptr); + } + + template + inline void PFor::operator()(size_t mlast, DExt last) const + { + operator()(mlast, *reinterpret_cast(last.first)); + } + + template + inline void PFor::operator()(size_t mlast, + ExtType last) const + { + typedef typename IndexClass::RangeType RangeType; + int pos = 0; + size_t mnpos = 0; + ExtType npos; + auto expr = mExpr; +#pragma omp parallel shared(expr,mnpos,npos) private(pos) + { +#pragma omp for nowait + for(pos = 0; pos < static_cast(ForBound::template bound(mMax)); pos++){ + mnpos = PosForward::valuex(mlast, mStep, pos); + npos = last + mExt*static_cast(pos); + expr(mnpos, npos); + } + } + } + + template + inline void PFor::operator()(size_t mlast) const + { + typedef typename IndexClass::RangeType RangeType; + const ExtType last; + int pos = 0; + size_t mnpos = 0; + ExtType npos; + auto expr = mExpr; +#pragma omp parallel shared(expr,mnpos,npos) private(pos) + { +#pragma omp for nowait + for(pos = 0; pos < static_cast(ForBound::template bound(mMax)); pos++){ + mnpos = PosForward::valuex(mlast, mStep, pos); + npos = last + mExt*static_cast(pos); + expr(mnpos, npos); + } + } + } + + + template + auto PFor::rootSteps(std::intptr_t iPtrNum) const + -> ExtType + { + return mExpr.rootSteps(iPtrNum); + } + + template + auto PFor::extension() const + -> ExtType + { + return mExt; + } + + template + DExt PFor::dRootSteps(std::intptr_t iPtrNum) const + { + mRootSteps = rootSteps(iPtrNum); + return std::make_pair(reinterpret_cast(&mRootSteps), + sizeof(ExtType)/sizeof(size_t)); + } + + template + DExt PFor::dExtension() const + { + return std::make_pair(reinterpret_cast(&mExt), + sizeof(ExtType)/sizeof(size_t)); + } + /************************ * SingleExpression * ************************/