extensions: finish basic arithmetic implementation for consecutive + avx

This commit is contained in:
Christian Zimmermann 2022-10-30 12:12:53 +01:00
parent fbfd84f421
commit c9f69ad25d
4 changed files with 401 additions and 63 deletions

View file

@ -6,6 +6,10 @@
namespace CNORXZ namespace CNORXZ
{ {
/***********************
* PlusCC / PlusCX *
***********************/
constexpr decltype(auto) PlusCC<Double,Double,ND>::eval(const Consecutive<Double,ND>& a, constexpr decltype(auto) PlusCC<Double,Double,ND>::eval(const Consecutive<Double,ND>& a,
const Consecutive<Double,ND>& b) const Consecutive<Double,ND>& b)
{ {
@ -62,6 +66,187 @@ namespace CNORXZ
return o; return o;
} }
/*************************
* MinusCC / MinusCX *
*************************/
constexpr decltype(auto) MinusCC<Double,Double,ND>::eval(const Consecutive<Double,ND>& a,
const Consecutive<Double,ND>& b)
{
Consecutive<Double,ND> o;
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_load_pd(b.mD);
__m256d ov = _mm256_sub_pd(av, bv);
_mm256_store_pd(o.mD, ov);
return o;
}
constexpr decltype(auto) MinusCC<Double,Double,ND>::aeval(Consecutive<Double,ND>& a,
const Consecutive<Double,ND>& b)
{
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_load_pd(b.mD);
__m256d ov = _mm256_sub_pd(av, bv);
_mm256_store_pd(a.mD, ov);
return a;
}
template <typename X>
static constexpr decltype(auto)
MinusCX<Double,X,ND>::eval(const Consecutive<Double,ND>& a, const X& b)
{
Consecutive<Double,ND> o;
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
__m256d ov = _mm256_sub_pd(av, bv);
_mm256_store_pd(o.mD, ov);
return o;
}
template <typename X>
static constexpr decltype(auto)
MinusCX<Double,X,ND>::aeval(Consecutive<Double,ND>& a, const X& b)
{
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
__m256d ov = _mm256_sub_pd(av, bv);
_mm256_store_pd(a.mD, ov);
return a;
}
template <typename X>
static constexpr decltype(auto)
MinusCX<Double,X,ND>::eval(const X& a, const Consecutive<Double,ND>& b)
{
Consecutive<Double,ND> o;
__m256d av = _mm256_set1_pd( static_cast<Double>(a) );
__m256d bv = _mm256_load_pd(b.mD);
__m256d ov = _mm256_sub_pd(av, bv);
_mm256_store_pd(o.mD, ov);
return o;
}
/***********************************
* MultipliesCC / MultipliesCX *
***********************************/
constexpr decltype(auto) MultipliesCC<Double,Double,ND>::eval(const Consecutive<Double,ND>& a,
const Consecutive<Double,ND>& b)
{
Consecutive<Double,ND> o;
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_load_pd(b.mD);
__m256d ov = _mm256_mul_pd(av, bv);
_mm256_store_pd(o.mD, ov);
return o;
}
constexpr decltype(auto) MultipliesCC<Double,Double,ND>::aeval(Consecutive<Double,ND>& a,
const Consecutive<Double,ND>& b)
{
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_load_pd(b.mD);
__m256d ov = _mm256_mul_pd(av, bv);
_mm256_store_pd(a.mD, ov);
return a;
}
template <typename X>
static constexpr decltype(auto)
MultipliesCX<Double,X,ND>::eval(const Consecutive<Double,ND>& a, const X& b)
{
Consecutive<Double,ND> o;
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
__m256d ov = _mm256_mul_pd(av, bv);
_mm256_store_pd(o.mD, ov);
return o;
}
template <typename X>
static constexpr decltype(auto)
MultipliesCX<Double,X,ND>::aeval(Consecutive<Double,ND>& a, const X& b)
{
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
__m256d ov = _mm256_mul_pd(av, bv);
_mm256_store_pd(a.mD, ov);
return a;
}
template <typename X>
static constexpr decltype(auto)
MultipliesCX<Double,X,ND>::eval(const X& a, const Consecutive<Double,ND>& b)
{
Consecutive<Double,ND> o;
__m256d av = _mm256_set1_pd( static_cast<Double>(a) );
__m256d bv = _mm256_load_pd(b.mD);
__m256d ov = _mm256_mul_pd(av, bv);
_mm256_store_pd(o.mD, ov);
return o;
}
/*****************************
* DividesCC / DividesCX *
*****************************/
constexpr decltype(auto) DividesCC<Double,Double,ND>::eval(const Consecutive<Double,ND>& a,
const Consecutive<Double,ND>& b)
{
Consecutive<Double,ND> o;
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_load_pd(b.mD);
__m256d ov = _mm256_div_pd(av, bv);
_mm256_store_pd(o.mD, ov);
return o;
}
constexpr decltype(auto) DividesCC<Double,Double,ND>::aeval(Consecutive<Double,ND>& a,
const Consecutive<Double,ND>& b)
{
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_load_pd(b.mD);
__m256d ov = _mm256_div_pd(av, bv);
_mm256_store_pd(a.mD, ov);
return a;
}
template <typename X>
static constexpr decltype(auto)
DividesCX<Double,X,ND>::eval(const Consecutive<Double,ND>& a, const X& b)
{
Consecutive<Double,ND> o;
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
__m256d ov = _mm256_div_pd(av, bv);
_mm256_store_pd(o.mD, ov);
return o;
}
template <typename X>
static constexpr decltype(auto)
DividesCX<Double,X,ND>::aeval(Consecutive<Double,ND>& a, const X& b)
{
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
__m256d ov = _mm256_div_pd(av, bv);
_mm256_store_pd(a.mD, ov);
return a;
}
template <typename X>
static constexpr decltype(auto)
DividesCX<Double,X,ND>::eval(const X& a, const Consecutive<Double,ND>& b)
{
Consecutive<Double,ND> o;
__m256d av = _mm256_set1_pd( static_cast<Double>(a) );
__m256d bv = _mm256_load_pd(b.mD);
__m256d ov = _mm256_div_pd(av, bv);
_mm256_store_pd(o.mD, ov);
return o;
}
} }
#endif #endif

View file

@ -37,6 +37,75 @@ namespace CNORXZ
static constexpr decltype(auto) static constexpr decltype(auto)
eval(const X& a, const Consecutive<Double,ND>& b); eval(const X& a, const Consecutive<Double,ND>& b);
}; };
template <>
struct MinusCC<Double,Double,ND>
{
static constexpr decltype(auto)
eval(const Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
static constexpr decltype(auto)
aeval(Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
};
template <typename X>
struct MinusCX<Double,X,ND>
{
static constexpr decltype(auto)
eval(const Consecutive<Double,ND>& a, const X& b);
static constexpr decltype(auto)
aeval(Consecutive<Double,ND>& a, const X& b);
static constexpr decltype(auto)
eval(const X& a, const Consecutive<Double,ND>& b);
};
template <>
struct MultipliesCC<Double,Double,ND>
{
static constexpr decltype(auto)
eval(const Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
static constexpr decltype(auto)
aeval(Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
};
template <typename X>
struct MultipliesCX<Double,X,ND>
{
static constexpr decltype(auto)
eval(const Consecutive<Double,ND>& a, const X& b);
static constexpr decltype(auto)
aeval(Consecutive<Double,ND>& a, const X& b);
static constexpr decltype(auto)
eval(const X& a, const Consecutive<Double,ND>& b);
};
template <>
struct DividesCC<Double,Double,ND>
{
static constexpr decltype(auto)
eval(const Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
static constexpr decltype(auto)
aeval(Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
};
template <typename X>
struct DividesCX<Double,X,ND>
{
static constexpr decltype(auto)
eval(const Consecutive<Double,ND>& a, const X& b);
static constexpr decltype(auto)
aeval(Consecutive<Double,ND>& a, const X& b);
static constexpr decltype(auto)
eval(const X& a, const Consecutive<Double,ND>& b);
};
} }
#endif #endif

View file

@ -150,31 +150,31 @@ namespace CNORXZ
*******************************/ *******************************/
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator-(const Consecutive<T,N>& a, const Consecutive<U,N>& b) constexpr decltype(auto) MinusCC<T,U,N>::eval(const Consecutive<T,N>& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x - y; }, a, b );
}
template <typename T, typename X, SizeT N>
constexpr decltype(auto) MinusCX<T,X,N>::eval(const Consecutive<T,N>& a, const X& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x - y; }, a, b );
}
template <typename T, typename X, SizeT N>
constexpr decltype(auto) MinusCX<T,X,N>::eval(const X& a, const Consecutive<T,N>& b)
{ {
return consecFunc( [](const auto& x, const auto& y) { return x - y; }, a, b ); return consecFunc( [](const auto& x, const auto& y) { return x - y; }, a, b );
} }
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator-(const Consecutive<T,N>& a, const U& b) constexpr Consecutive<T,N>& MinusCC<T,U,N>::aeval(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{
return consecFunc( [](const auto& x, const auto& y) { return x - y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator-(const T& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x - y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator-=(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{ {
return consecFuncA( [](auto& x, const auto& y) { return x -= y; }, a, b ); return consecFuncA( [](auto& x, const auto& y) { return x -= y; }, a, b );
} }
template <typename T, typename U, SizeT N> template <typename T, typename X, SizeT N>
constexpr Consecutive<T,N>& operator-=(Consecutive<T,N>& o, const U& a) constexpr Consecutive<T,N>& MinusCX<T,X,N>::aeval(Consecutive<T,N>& o, const X& a)
{ {
return consecFuncA( [](auto& x, const auto& y) { return x -= y; }, a, b ); return consecFuncA( [](auto& x, const auto& y) { return x -= y; }, a, b );
} }
@ -184,31 +184,31 @@ namespace CNORXZ
***********************************/ ***********************************/
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator*(const Consecutive<T,N>& a, const Consecutive<U,N>& b) constexpr decltype(auto) MultipliesCC<T,U,N>::eval(const Consecutive<T,N>& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x * y; }, a, b );
}
template <typename T, typename X, SizeT N>
constexpr decltype(auto) MultipliesCX<T,U,N>::eval(const Consecutive<T,N>& a, const U& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x * y; }, a, b );
}
template <typename T, typename X, SizeT N>
constexpr decltype(auto) MultipliesCX<T,X,N>::eval(const X& a, const Consecutive<T,N>& b)
{ {
return consecFunc( [](const auto& x, const auto& y) { return x * y; }, a, b ); return consecFunc( [](const auto& x, const auto& y) { return x * y; }, a, b );
} }
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator*(const Consecutive<T,N>& a, const U& b) constexpr Consecutive<T,N>& MultipliesCC<T,U,N>::aeval(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{
return consecFunc( [](const auto& x, const auto& y) { return x * y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator*(const T& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x * y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator*=(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{ {
return consecFuncA( [](const auto& x, const auto& y) { return x *= y; }, a, b ); return consecFuncA( [](const auto& x, const auto& y) { return x *= y; }, a, b );
} }
template <typename T, typename U, SizeT N> template <typename T, typename X, SizeT N>
constexpr Consecutive<T,N>& operator*=(Consecutive<T,N>& o, const U& a) constexpr Consecutive<T,N>& MultipliesCX<T,X,N>::eval(Consecutive<T,N>& o, const X& a)
{ {
return consecFuncA( [](const auto& x, const auto& y) { return x *= y; }, a, b ); return consecFuncA( [](const auto& x, const auto& y) { return x *= y; }, a, b );
} }
@ -218,31 +218,31 @@ namespace CNORXZ
*********************************/ *********************************/
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator/(const Consecutive<T,N>& a, const Consecutive<U,N>& b) constexpr decltype(auto) DividesCC<T,U,N>::eval(const Consecutive<T,N>& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x / y; }, a, b );
}
template <typename T, typename X, SizeT N>
constexpr decltype(auto) DividesCX<T,X,N>::eval(const Consecutive<T,N>& a, const X& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x / y; }, a, b );
}
template <typename T, typename X, SizeT N>
constexpr decltype(auto) DividesCX<T,X,N>::eval(const X& a, const Consecutive<T,N>& b)
{ {
return consecFunc( [](const auto& x, const auto& y) { return x / y; }, a, b ); return consecFunc( [](const auto& x, const auto& y) { return x / y; }, a, b );
} }
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator/(const Consecutive<T,N>& a, const U& b) constexpr Consecutive<T,N>& DividesCC<T,U,N>::aeval(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{
return consecFunc( [](const auto& x, const auto& y) { return x / y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator/(const T& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x / y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator/=(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{ {
return consecFuncA( [](const auto& x, const auto& y) { return x /= y; }, a, b ); return consecFuncA( [](const auto& x, const auto& y) { return x /= y; }, a, b );
} }
template <typename T, typename U, SizeT N> template <typename T, typename X, SizeT N>
constexpr Consecutive<T,N>& operator/=(Consecutive<T,N>& o, const U& a) constexpr Consecutive<T,N>& DividesCX<T,X,N>::eval(Consecutive<T,N>& o, const X& a)
{ {
return consecFuncA( [](const auto& x, const auto& y) { return x /= y; }, a, b ); return consecFuncA( [](const auto& x, const auto& y) { return x /= y; }, a, b );
} }

View file

@ -128,57 +128,141 @@ namespace CNORXZ
*******************************/ *******************************/
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator-(const Consecutive<T,N>& a, const Consecutive<U,N>& b); struct MinusCC
{
static constexpr decltype(auto)
eval(const Consecutive<T,N>& a, const Consecutive<U,N>& b);
static constexpr decltype(auto)
aeval(Consecutive<T,N>& a, const Consecutive<U,N>& b);
};
template <typename T, typename X, SizeT N>
struct MinusCX
{
static constexpr decltype(auto)
eval(const Consecutive<T,N>& a, const X& b);
static constexpr decltype(auto)
aeval(Consecutive<T,N>& a, const X& b);
static constexpr decltype(auto)
eval(const X& a, const Consecutive<T,N>& b);
};
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator-(const Consecutive<T,N>& a, const U& b); constexpr decltype(auto) operator-(const Consecutive<T,N>& a, const Consecutive<U,N>& b)
{ return MinusCC<T,U,N>::eval(a,b); }
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator-(const T& a, const Consecutive<U,N>& b); constexpr decltype(auto) operator-(const Consecutive<T,N>& a, const U& b)
{ return MinusCX<T,U,N>::eval(a,b); }
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator-=(Consecutive<T,N>& o, const Consecutive<U,N>& a); constexpr decltype(auto) operator-(const T& a, const Consecutive<U,N>& b)
{ return MinusCX<U,T,N>::eval(a,b); }
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator-=(Consecutive<T,N>& o, const U& a); constexpr Consecutive<T,N>& operator-=(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{ return MinusCC<T,U,N>::eval(a,b); }
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator-=(Consecutive<T,N>& o, const U& a)
{ return MinusCX<T,U,N>::eval(a,b); }
/*********************************** /***********************************
* basic operations: muliplies * * basic operations: muliplies *
***********************************/ ***********************************/
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator*(const Consecutive<T,N>& a, const Consecutive<U,N>& b); struct MultipliesCC
{
static constexpr decltype(auto)
eval(const Consecutive<T,N>& a, const Consecutive<U,N>& b);
static constexpr decltype(auto)
aeval(Consecutive<T,N>& a, const Consecutive<U,N>& b);
};
template <typename T, typename X, SizeT N>
struct MultipliesCX
{
static constexpr decltype(auto)
eval(const Consecutive<T,N>& a, const X& b);
static constexpr decltype(auto)
aeval(Consecutive<T,N>& a, const X& b);
static constexpr decltype(auto)
eval(const X& a, const Consecutive<T,N>& b);
};
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator*(const Consecutive<T,N>& a, const U& b); constexpr decltype(auto) operator*(const Consecutive<T,N>& a, const Consecutive<U,N>& b)
{ return MulitpliesCC<T,U,N>::eval(a,b); }
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator*(const T& a, const Consecutive<U,N>& b); constexpr decltype(auto) operator*(const Consecutive<T,N>& a, const U& b)
{ return MulitpliesCX<T,U,N>::eval(a,b); }
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator*=(Consecutive<T,N>& o, const Consecutive<U,N>& a); constexpr decltype(auto) operator*(const T& a, const Consecutive<U,N>& b)
{ return MulitpliesXC<U,T,N>::eval(a,b); }
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator*=(Consecutive<T,N>& o, const U& a); constexpr Consecutive<T,N>& operator*=(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{ return MulitpliesCC<T,U,N>::eval(a,b); }
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator*=(Consecutive<T,N>& o, const U& a)
{ return MulitpliesCX<T,U,N>::eval(a,b); }
/********************************* /*********************************
* basic operations: divides * * basic operations: divides *
*********************************/ *********************************/
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator/(const Consecutive<T,N>& a, const Consecutive<U,N>& b); struct DividesCC
{
static constexpr decltype(auto)
eval(const Consecutive<T,N>& a, const Consecutive<U,N>& b);
static constexpr decltype(auto)
aeval(Consecutive<T,N>& a, const Consecutive<U,N>& b);
};
template <typename T, typename X, SizeT N>
struct DividesCX
{
static constexpr decltype(auto)
eval(const Consecutive<T,N>& a, const X& b);
static constexpr decltype(auto)
aeval(Consecutive<T,N>& a, const X& b);
static constexpr decltype(auto)
eval(const X& a, const Consecutive<T,N>& b);
};
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator/(const Consecutive<T,N>& a, const U& b); constexpr decltype(auto) operator/(const Consecutive<T,N>& a, const Consecutive<U,N>& b)
{ return DividesCC<T,U,N>::eval(a,b); }
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator/(const T& a, const Consecutive<U,N>& b); constexpr decltype(auto) operator/(const Consecutive<T,N>& a, const U& b)
{ return DividesCX<T,U,N>::eval(a,b); }
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator/=(Consecutive<T,N>& o, const Consecutive<U,N>& a); constexpr decltype(auto) operator/(const T& a, const Consecutive<U,N>& b)
{ return DividesCX<U,T,N>::eval(a,b); }
template <typename T, typename U, SizeT N> template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator/=(Consecutive<T,N>& o, const U& a); constexpr Consecutive<T,N>& operator/=(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{ return DividesCC<T,U,N>::eval(a,b); }
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator/=(Consecutive<T,N>& o, const U& a)
{ return DividesCX<T,U,N>::eval(a,b); }
} }