vectorizes, but not that optimal...

This commit is contained in:
Christian Zimmermann 2019-02-14 14:39:59 +01:00
parent 6d776f853d
commit 53c433f8df
3 changed files with 30 additions and 9 deletions

View file

@ -86,6 +86,12 @@ namespace MultiArrayTools
{ {
return a1 + a2; return a1 + a2;
} }
static inline T& sapply(T& a1, T a2)
{
return a1 += a2;
}
}; };
template <typename T> template <typename T>

View file

@ -91,9 +91,9 @@ namespace MultiArrayTools
template <typename T> template <typename T>
struct SelfIdentity struct SelfIdentity
{ {
static inline T apply(const T& a, const T& b) static inline T& sapply(T& a, const T& b)
{ {
return b; return a = b;
} }
}; };
@ -140,7 +140,7 @@ namespace MultiArrayTools
OperationMaster(T* data, const OpClass& second, OperationMaster(T* data, const OpClass& second,
IndexType& index, bool doParallel = false); IndexType& index, bool doParallel = false);
inline void set(size_t pos, T val) { mDataPtr[pos] = AOp::apply(mDataPtr[pos],val); } inline void set(size_t pos, T val) { AOp::sapply(mDataPtr[pos],val); }
//inline void add(size_t pos, T val) { mDataPtr[pos] += val; } //inline void add(size_t pos, T val) { mDataPtr[pos] += val; }
inline T get(size_t pos) const; inline T get(size_t pos) const;

View file

@ -154,23 +154,38 @@ namespace MultiArrayTools
friend OperationClass; friend OperationClass;
}; };
typedef struct v256 { double _x[4]; } v256; typedef struct v256 { alignas(32) double _x[4]; } v256;
template <int N> template <int N>
inline void xadd(double* o, const double* a, const double* b) inline void xadd(double* o, const double* a, const double* b)
{ {
//#pragma omp simd aligned(o, a, b: 16) #pragma omp simd aligned(o, a, b: 32)
for(int i = 0; i < N; i++) { for(int i = 0; i < N; i++) {
o[i] = a[i] + b[i]; o[i] = a[i] + b[i];
} }
} }
template <int N>
inline void xsadd(double* o, const double* a)
{
#pragma omp simd aligned(o, a: 32)
for(int i = 0; i < N; i++) {
o[i] += a[i];
}
}
inline v256 operator+(const v256& a, const v256& b) inline v256 operator+(const v256& a, const v256& b)
{ {
alignas(32) v256 out; alignas(32) v256 o;
xadd<4>( reinterpret_cast<double*>(&out), reinterpret_cast<const double*>(&a), xadd<4>( reinterpret_cast<double*>(&o), reinterpret_cast<const double*>(&a),
reinterpret_cast<const double*>(&b) ); reinterpret_cast<const double*>(&b) );
return out; return o;
}
inline v256& operator+=(v256& o, const v256& a)
{
xsadd<4>( reinterpret_cast<double*>(&o), reinterpret_cast<const double*>(&a) );
return o;
} }
/* /*
inline v256 operator-(const v256& a, const v256& b) inline v256 operator-(const v256& a, const v256& b)