From 53c433f8dfc9fdea49dca2879d5fc331a007f2d8 Mon Sep 17 00:00:00 2001 From: Christian Zimmermann Date: Thu, 14 Feb 2019 14:39:59 +0100 Subject: [PATCH] vectorizes, but not that optimal... --- src/include/arith.h | 6 ++++++ src/include/multi_array_operation.h | 6 +++--- src/include/type_operations.h | 27 +++++++++++++++++++++------ 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/src/include/arith.h b/src/include/arith.h index c3d94b7..5bf1a09 100644 --- a/src/include/arith.h +++ b/src/include/arith.h @@ -86,6 +86,12 @@ namespace MultiArrayTools { return a1 + a2; } + + static inline T& sapply(T& a1, T a2) + { + return a1 += a2; + } + }; template diff --git a/src/include/multi_array_operation.h b/src/include/multi_array_operation.h index a5a3dfe..31e997f 100644 --- a/src/include/multi_array_operation.h +++ b/src/include/multi_array_operation.h @@ -91,9 +91,9 @@ namespace MultiArrayTools template struct SelfIdentity { - static inline T apply(const T& a, const T& b) + static inline T& sapply(T& a, const T& b) { - return b; + return a = b; } }; @@ -140,7 +140,7 @@ namespace MultiArrayTools OperationMaster(T* data, const OpClass& second, IndexType& index, bool doParallel = false); - inline void set(size_t pos, T val) { mDataPtr[pos] = AOp::apply(mDataPtr[pos],val); } + inline void set(size_t pos, T val) { AOp::sapply(mDataPtr[pos],val); } //inline void add(size_t pos, T val) { mDataPtr[pos] += val; } inline T get(size_t pos) const; diff --git a/src/include/type_operations.h b/src/include/type_operations.h index 564b83b..8508fb5 100644 --- a/src/include/type_operations.h +++ b/src/include/type_operations.h @@ -154,25 +154,40 @@ namespace MultiArrayTools friend OperationClass; }; - typedef struct v256 { double _x[4]; } v256; + typedef struct v256 { alignas(32) double _x[4]; } v256; template inline void xadd(double* o, const double* a, const double* b) { - //#pragma omp simd aligned(o, a, b: 16) +#pragma omp simd aligned(o, a, b: 32) for(int i = 0; i < N; i++) { o[i] = a[i] + b[i]; } } + + template + inline void xsadd(double* o, const double* a) + { +#pragma omp simd aligned(o, a: 32) + for(int i = 0; i < N; i++) { + o[i] += a[i]; + } + } inline v256 operator+(const v256& a, const v256& b) { - alignas(32) v256 out; - xadd<4>( reinterpret_cast(&out), reinterpret_cast(&a), + alignas(32) v256 o; + xadd<4>( reinterpret_cast(&o), reinterpret_cast(&a), reinterpret_cast(&b) ); - return out; + return o; } - /* + + inline v256& operator+=(v256& o, const v256& a) + { + xsadd<4>( reinterpret_cast(&o), reinterpret_cast(&a) ); + return o; + } +/* inline v256 operator-(const v256& a, const v256& b) { alignas(32) v256 out;