vectorizes, but not that optimal...

This commit is contained in:
Christian Zimmermann 2019-02-14 14:39:59 +01:00
parent 6d776f853d
commit 53c433f8df
3 changed files with 30 additions and 9 deletions

View file

@ -86,6 +86,12 @@ namespace MultiArrayTools
{
return a1 + a2;
}
static inline T& sapply(T& a1, T a2)
{
return a1 += a2;
}
};
template <typename T>

View file

@ -91,9 +91,9 @@ namespace MultiArrayTools
template <typename T>
struct SelfIdentity
{
static inline T apply(const T& a, const T& b)
static inline T& sapply(T& a, const T& b)
{
return b;
return a = b;
}
};
@ -140,7 +140,7 @@ namespace MultiArrayTools
OperationMaster(T* data, const OpClass& second,
IndexType& index, bool doParallel = false);
inline void set(size_t pos, T val) { mDataPtr[pos] = AOp::apply(mDataPtr[pos],val); }
inline void set(size_t pos, T val) { AOp::sapply(mDataPtr[pos],val); }
//inline void add(size_t pos, T val) { mDataPtr[pos] += val; }
inline T get(size_t pos) const;

View file

@ -154,23 +154,38 @@ namespace MultiArrayTools
friend OperationClass;
};
typedef struct v256 { double _x[4]; } v256;
typedef struct v256 { alignas(32) double _x[4]; } v256;
template <int N>
inline void xadd(double* o, const double* a, const double* b)
{
//#pragma omp simd aligned(o, a, b: 16)
#pragma omp simd aligned(o, a, b: 32)
for(int i = 0; i < N; i++) {
o[i] = a[i] + b[i];
}
}
template <int N>
inline void xsadd(double* o, const double* a)
{
#pragma omp simd aligned(o, a: 32)
for(int i = 0; i < N; i++) {
o[i] += a[i];
}
}
inline v256 operator+(const v256& a, const v256& b)
{
alignas(32) v256 out;
xadd<4>( reinterpret_cast<double*>(&out), reinterpret_cast<const double*>(&a),
alignas(32) v256 o;
xadd<4>( reinterpret_cast<double*>(&o), reinterpret_cast<const double*>(&a),
reinterpret_cast<const double*>(&b) );
return out;
return o;
}
inline v256& operator+=(v256& o, const v256& a)
{
xsadd<4>( reinterpret_cast<double*>(&o), reinterpret_cast<const double*>(&a) );
return o;
}
/*
inline v256 operator-(const v256& a, const v256& b)