more on extensions...

This commit is contained in:
Christian Zimmermann 2022-10-29 03:08:34 +02:00
parent cf7dcb816b
commit fbfd84f421
4 changed files with 379 additions and 121 deletions

View file

@ -6,41 +6,60 @@
namespace CNORXZ
{
inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::make(const Double* d)
constexpr decltype(auto) PlusCC<Double,Double,ND>::eval(const Consecutive<Double,ND>& a,
const Consecutive<Double,ND>& b)
{
return *reinterpret_cast<const AVX::ConsecutiveD*>( d );
Consecutive<Double,ND> o;
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_load_pd(b.mD);
__m256d ov = _mm256_add_pd(av, bv);
_mm256_store_pd(o.mD, ov);
return o;
}
inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::make(Double* d)
constexpr decltype(auto) PlusCC<Double,Double,ND>::aeval(Consecutive<Double,ND>& a,
const Consecutive<Double,ND>& b)
{
return *reinterpret_cast<AVX::ConsecutiveD*>( d );
}
template <typename... Args>
inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::makeA(Args&&... args)
{
static_assert(sizeof...(Args) == AVX_SIZE/sizeof(Double),
"got inconsistent number of arguments");
return AVX::ConsecutiveD { _mm256_setr_pd(args...); }
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_load_pd(b.mD);
__m256d ov = _mm256_add_pd(av, bv);
_mm256_store_pd(a.mD, ov);
return a;
}
inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::make(const Int* d)
template <typename X>
static constexpr decltype(auto)
PlusCX<Double,X,ND>::eval(const Consecutive<Double,ND>& a, const X& b)
{
return *reinterpret_cast<const AVX::ConsecutiveI*>( d );
Consecutive<Double,ND> o;
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
__m256d ov = _mm256_add_pd(av, bv);
_mm256_store_pd(o.mD, ov);
return o;
}
inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::make(Int* d)
template <typename X>
static constexpr decltype(auto)
PlusCX<Double,X,ND>::aeval(Consecutive<Double,ND>& a, const X& b)
{
return *reinterpret_cast<AVX::ConsecutiveI*>( d );
__m256d av = _mm256_load_pd(a.mD);
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
__m256d ov = _mm256_add_pd(av, bv);
_mm256_store_pd(a.mD, ov);
return a;
}
template <typename... Args>
inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::makeA(Args&&... args)
template <typename X>
static constexpr decltype(auto)
PlusCX<Double,X,ND>::eval(const X& a, const Consecutive<Double,ND>& b)
{
static_assert(sizeof(Int) == 32/8, "lib error: Int size has changed");
static_assert(sizeof...(Args) == AVX_SIZE/sizeof(Int),
"got inconsistent number of arguments");
return AVX::ConsecutiveI { _mm256_setr_epi32(args...); }
Consecutive<Double,ND> o;
__m256d av = _mm256_set1_pd( static_cast<Double>(a) );
__m256d bv = _mm256_load_pd(b.mD);
__m256d ov = _mm256_add_pd(av, bv);
_mm256_store_pd(o.mD, ov);
return o;
}
}

View file

@ -12,40 +12,31 @@ namespace CNORXZ
{
namespace AVX
{
// define for all types that are defined in base/types.h
struct ConsecutiveD
{
__m256d mD;
};
struct ConsecutiveI
{
__m256i mD;
};
static constexpr SizeT ND = AVX_VSIZE/sizeof(Double);
}
template <>
struct MkConsecutive<Double,AVX_SIZE/sizeof(Double)>
struct PlusCC<Double,Double,ND>
{
static inline decltype(auto) make(const Double* d);
static constexpr decltype(auto)
eval(const Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
static inline decltype(auto) make(Double* d);
template <typename... Args>
static inline decltype(auto) makeA(Args&&... args);
static constexpr decltype(auto)
aeval(Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
};
template <>
struct MkConsecutive<Double,AVX_SIZE/sizeof(Int)>
template <typename X>
struct PlusCX<Double,X,ND>
{
static inline decltype(auto) make(const Int* d);
static constexpr decltype(auto)
eval(const Consecutive<Double,ND>& a, const X& b);
static inline decltype(auto) make(Int* d);
static constexpr decltype(auto)
aeval(Consecutive<Double,ND>& a, const X& b);
template <typename... Args>
static inline decltype(auto) makeA(Args&&... args);
static constexpr decltype(auto)
eval(const X& a, const Consecutive<Double,ND>& b);
};
}
#endif

View file

@ -7,31 +7,12 @@
namespace CNORXZ
{
template <typename T, SizeT N>
inline decltype(auto) MkConsecutive<T,N>::make(const T* d)
{
return *reinterpret_cast<const Consecutive<T,N>*>(d);
}
template <typename T, SizeT N>
inline decltype(auto) MkConsecutive<T,N>::make(T* d)
{
return *reinterpret_cast<Consecutive<T,N>*>(d);
}
template <typename T, SizeT N>
template <typename... Args>
inline decltype(auto) MkConsecutive<T,N>::makeA(Args&&... args)
{
return Consecutive<T,N> { args... };
}
template <typename T, class EPosT, SizeT... Is>
inline decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is)
{
constexpr SizeT N = epos_size<EPosT>::value;
static_assert(N == sizeof...(Is), "got inconsistent index sequence");
return MkConsecutive<T,N>::makeA( d[pos.val()+pos.template get<Is>().val()]... );
return Consecutive<T,N> { args... };
}
template <typename T, class EPosT>
@ -40,7 +21,7 @@ namespace CNORXZ
constexpr SizeT N = epos_size<EPosT>::value;
static_assert(is_epos_type<EPosT>::value, "got non-epos-type");
if constexpr(pos_type_is_consecutive<EPosT>::value){
return MkConsecutive<T,N>::make(d+pos.val()+pos.template get<0>().val());
return *reinterpret_cast<const Consecutive<T,N>*>(d);
}
else {
return vregi(d, pos, std::make_index_sequence<N>{});
@ -53,13 +34,218 @@ namespace CNORXZ
constexpr SizeT N = epos_size<EPosT>::value;
static_assert(is_epos_type<EPosT>::value, "got non-epos-type");
if constexpr(pos_type_is_consecutive<EPosT>::value){
return MkConsecutive<T,N>::make(d+pos.val()+pos.template get<0>().val());
return *reinterpret_cast<Consecutive<T,N>*>(d);
}
else {
return vregi(d, pos, std::make_index_sequence<N>{});
}
}
template <SizeT I, typename T>
constexpr decltype(auto) consecGet(const T& a)
{
if constexpr(is_consecutive_type<T>::value){
static_assert(I < consecutive_size<T>::value,
"consecutive index out of range");
return a.mD[I];
}
else {
return a;
}
}
template <SizeT I, typename T>
constexpr decltype(auto) consecGet(T& a)
{
if constexpr(is_consecutive_type<T>::value){
static_assert(I < consecutive_size<T>::value,
"consecutive index out of range");
return a.mD[I];
}
else {
return a;
}
}
template <SizeT I, class F, typename... Args>
constexpr decltype(auto) consecApply(const F& f, const Args&... args)
{
return f( consecGet<I>(args)... );
}
template <SizeT I, class F, typename Dst, typename... Args>
constexpr Dst& consecAssign(const F& f, Dst& dst, const Args&... args)
{
f( consecGet<I>(dst), consecGet<I>(args)... );
return dst;
}
template <class F, typename... Args, SizeT... Is>
static constexpr decltype(auto) consecFuncI(const F& f, const Args&... args,
std::index_sequence<Is...> is);
{
typedef decltype(consecApply<0>(f, args...)) OType;
constexpr SizeT N = sizeof...(Is);
return Consecutive<OType,N> { consecApply<Is>(f, args...) ... };
}
template <class F, typename Dst, typename... Args, SizeT... Is>
constexpr Dst& consecFuncAI(const F& f, Dst& dst, const Args&... args,
std::index_sequence<Is...> is)
{
( consecAssign<Is>(f, dst, args...), ... );
return dst;
}
template <SizeT N, class F, typename... Args>
constexpr decltype(auto) consecFunc(const F& f, const Args&... args)
{
return consecFuncI<F,Args...>(f, args..., std::make_index_sequence<N>{});
}
template <SizeT N, class F, typename Dst, typename... Args>
constexpr Dst& consecFuncA(const F& f, Dst& dst, const Args&... args)
{
return consecFuncAI<F,Dst,Args...>(f, dst, args..., std::make_index_sequence<N>{});
}
/******************************
* basic operations: plus *
******************************/
template <typename T, typename U, SizeT N>
constexpr decltype(auto)
PlusCC<T,U,N>::eval(const Consecutive<T,N>& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x + y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>&
PlusCC<T,U,N>::aeval(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{
return consecFuncA( [](auto& x, const auto& y) { return x += y; }, a, b );
}
template <typename T, typename X, SizeT N>
constexpr decltype(auto) PlusCX<T,X,N>::eval(const Consecutive<T,N>& a, const X& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x + y; }, a, b );
}
template <typename T, typename X, SizeT N>
constexpr decltype(auto) PlusCX<T,X,N>::eval(const X& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x + y; }, a, b );
}
template <typename T, typename X, SizeT N>
constexpr Consecutive<T,N>& PlusCX<T,X,N>::aeval(Consecutive<T,N>& o, const X& a)
{
return consecFuncA( [](auto& x, const auto& y) { return x += y; }, a, b );
}
/*******************************
* basic operations: minus *
*******************************/
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator-(const Consecutive<T,N>& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x - y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator-(const Consecutive<T,N>& a, const U& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x - y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator-(const T& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x - y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator-=(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{
return consecFuncA( [](auto& x, const auto& y) { return x -= y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator-=(Consecutive<T,N>& o, const U& a)
{
return consecFuncA( [](auto& x, const auto& y) { return x -= y; }, a, b );
}
/***********************************
* basic operations: muliplies *
***********************************/
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator*(const Consecutive<T,N>& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x * y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator*(const Consecutive<T,N>& a, const U& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x * y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator*(const T& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x * y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator*=(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{
return consecFuncA( [](const auto& x, const auto& y) { return x *= y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator*=(Consecutive<T,N>& o, const U& a)
{
return consecFuncA( [](const auto& x, const auto& y) { return x *= y; }, a, b );
}
/*********************************
* basic operations: divides *
*********************************/
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator/(const Consecutive<T,N>& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x / y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator/(const Consecutive<T,N>& a, const U& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x / y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator/(const T& a, const Consecutive<U,N>& b)
{
return consecFunc( [](const auto& x, const auto& y) { return x / y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator/=(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{
return consecFuncA( [](const auto& x, const auto& y) { return x /= y; }, a, b );
}
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator/=(Consecutive<T,N>& o, const U& a)
{
return consecFuncA( [](const auto& x, const auto& y) { return x /= y; }, a, b );
}
}
#endif

View file

@ -6,8 +6,7 @@
namespace CNORXZ
{
// pseudo extension type to be returned if extension vector of
// reuqired size is not available
// no use of Arr = std::array here, since I want ensure that
// it has exactly a memory size of N
template <typename T, SizeT N>
@ -16,18 +15,23 @@ namespace CNORXZ
T mD[N];
};
// specialize for all kinds of available vector registers:
template <typename T, SizeT N>
struct MkConsecutive
{
static inline decltype(auto) make(const T* d);
template <typename T>
struct is_consecutive_type { CXZ_CVAL_FALSE; };
static inline decltype(auto) make(T* d);
template <typename T>
struct consecutive_base { typedef T type; };
template <typename... Args>
static inline decltype(auto) makeA(Args&&... args);
};
template <typename T>
struct consecutive_size { static constexpr SizeT value = 0; };
template <typename T, SizeT N>
struct is_consecutive_type<Consecutive<T,N>> { CXZ_CVAL_TRUE; };
template <typename T, SizeT N>
struct consecutive_base<Consecutive<T,N>> { typedef T type; };
template <typename T, SizeT N>
struct consecutive_size<Consecutive<T,N>> { static constexpr SizeT value = N; };
/****************************************
* consecutive generating functions *
@ -42,81 +46,139 @@ namespace CNORXZ
template <typename T, class EPosT>
inline decltype(auto) vreg(T* d, const EPosT& pos);
/******************
* ConsecFunc *
******************/
template <SizeT I, typename T>
constexpr decltype(auto) consecGet(const T& a);
template <SizeT I, typename T>
constexpr decltype(auto) consecGet(T& a);
template <SizeT I, class F, typename... Args>
constexpr decltype(auto) consecApply(const F& f, const Args&... args);
template <SizeT I, class F, typename Dst, typename... Args>
constexpr Dst& consecAssign(const F& f, Dst& dst, const Args&... args);
template <class F, typename... Args, SizeT... Is>
constexpr decltype(auto) consecFuncI(const F& f, const Args&... args,
std::index_sequence<Is...> is);
template <class F, typename Dst, typename... Args, SizeT... Is>
constexpr Dst& consecFuncAI(const F& f, Dst& dst, const Args&... args,
std::index_sequence<Is...> is);
template <SizeT N, class F, typename... Args>
constexpr decltype(auto) consecFunc(const F& f, const Args&... args);
template <SizeT N, class F, typename Dst, typename... Args>
constexpr Dst& consecFuncA(const F& f, Dst& dst, const Args&... args);
/******************************
* basic operations: plus *
******************************/
template <typename T, typename U, SizeT N>
struct PlusCC
{
static constexpr decltype(auto)
eval(const Consecutive<T,N>& a, const Consecutive<U,N>& b);
static constexpr decltype(auto)
aeval(Consecutive<T,N>& a, const Consecutive<U,N>& b);
};
template <typename T, typename X, SizeT N>
struct PlusCX
{
static constexpr decltype(auto)
eval(const Consecutive<T,N>& a, const X& b);
static constexpr decltype(auto)
aeval(Consecutive<T,N>& a, const X& b);
static constexpr decltype(auto)
eval(const X& a, const Consecutive<T,N>& b);
};
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator+(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator+(const Consecutive<T,N>& a, const Consecutive<U,N>& b)
{ return PlusCC<T,U,N>::eval(a,b); }
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator+(const Consecutive<T,N>& a, const T& b);
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator+(const Consecutive<T,N>& a, const U& b)
{ return PlusCX<T,U,N>::eval(a,b); }
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator+(const T& a, const Consecutive<T,N>& b);
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator+(const T& a, const Consecutive<U,N>& b)
{ return PlusCX<U,T,N>::eval(a,b); }
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator+=(const Consecutive<T,N>& a);
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator+=(Consecutive<T,N>& o, const Consecutive<U,N>& a)
{ return PlusCC<T,U,N>::aeval(a,b); }
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator+=(const T& a);
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator+=(Consecutive<T,N>& o, const U& a)
{ return PlusCX<T,U,N>::aeval(a,b); }
/*******************************
* basic operations: minus *
*******************************/
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator-(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator-(const Consecutive<T,N>& a, const Consecutive<U,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator-(const Consecutive<T,N>& a, const T& b);
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator-(const Consecutive<T,N>& a, const U& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator-(const T& a, const Consecutive<T,N>& b);
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator-(const T& a, const Consecutive<U,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator-=(const Consecutive<T,N>& a);
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator-=(Consecutive<T,N>& o, const Consecutive<U,N>& a);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator-=(const T& a);
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator-=(Consecutive<T,N>& o, const U& a);
/***********************************
* basic operations: muliplies *
***********************************/
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator*(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator*(const Consecutive<T,N>& a, const Consecutive<U,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator*(const Consecutive<T,N>& a, const T& b);
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator*(const Consecutive<T,N>& a, const U& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator*(const T& a, const Consecutive<T,N>& b);
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator*(const T& a, const Consecutive<U,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator*=(const Consecutive<T,N>& a);
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator*=(Consecutive<T,N>& o, const Consecutive<U,N>& a);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator*=(const T& a);
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator*=(Consecutive<T,N>& o, const U& a);
/*********************************
* basic operations: divides *
*********************************/
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator/(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator/(const Consecutive<T,N>& a, const Consecutive<U,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator/(const Consecutive<T,N>& a, const T& b);
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator/(const Consecutive<T,N>& a, const U& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator/(const T& a, const Consecutive<T,N>& b);
template <typename T, typename U, SizeT N>
constexpr decltype(auto) operator/(const T& a, const Consecutive<U,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator/=(const Consecutive<T,N>& a);
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator/=(Consecutive<T,N>& o, const Consecutive<U,N>& a);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator/=(const T& a);
template <typename T, typename U, SizeT N>
constexpr Consecutive<T,N>& operator/=(Consecutive<T,N>& o, const U& a);
}