From fbfd84f4218e9dc7178ff5422a2148d59467f301 Mon Sep 17 00:00:00 2001 From: Christian Zimmermann Date: Sat, 29 Oct 2022 03:08:34 +0200 Subject: [PATCH] more on extensions... --- src/include/operation/extensions/avx.cc.h | 67 ++++--- src/include/operation/extensions/avx.h | 39 ++-- src/include/operation/extensions/reg.cc.h | 230 +++++++++++++++++++--- src/include/operation/extensions/reg.h | 164 ++++++++++----- 4 files changed, 379 insertions(+), 121 deletions(-) diff --git a/src/include/operation/extensions/avx.cc.h b/src/include/operation/extensions/avx.cc.h index 298769a..7c99f55 100644 --- a/src/include/operation/extensions/avx.cc.h +++ b/src/include/operation/extensions/avx.cc.h @@ -6,41 +6,60 @@ namespace CNORXZ { - inline decltype(auto) MkConsecutive::make(const Double* d) + constexpr decltype(auto) PlusCC::eval(const Consecutive& a, + const Consecutive& b) { - return *reinterpret_cast( d ); + Consecutive o; + __m256d av = _mm256_load_pd(a.mD); + __m256d bv = _mm256_load_pd(b.mD); + __m256d ov = _mm256_add_pd(av, bv); + _mm256_store_pd(o.mD, ov); + return o; } - inline decltype(auto) MkConsecutive::make(Double* d) + constexpr decltype(auto) PlusCC::aeval(Consecutive& a, + const Consecutive& b) { - return *reinterpret_cast( d ); - } - - template - inline decltype(auto) MkConsecutive::makeA(Args&&... args) - { - static_assert(sizeof...(Args) == AVX_SIZE/sizeof(Double), - "got inconsistent number of arguments"); - return AVX::ConsecutiveD { _mm256_setr_pd(args...); } + __m256d av = _mm256_load_pd(a.mD); + __m256d bv = _mm256_load_pd(b.mD); + __m256d ov = _mm256_add_pd(av, bv); + _mm256_store_pd(a.mD, ov); + return a; } - inline decltype(auto) MkConsecutive::make(const Int* d) + template + static constexpr decltype(auto) + PlusCX::eval(const Consecutive& a, const X& b) { - return *reinterpret_cast( d ); + Consecutive o; + __m256d av = _mm256_load_pd(a.mD); + __m256d bv = _mm256_set1_pd( static_cast(b) ); + __m256d ov = _mm256_add_pd(av, bv); + _mm256_store_pd(o.mD, ov); + return o; } - - inline decltype(auto) MkConsecutive::make(Int* d) + + template + static constexpr decltype(auto) + PlusCX::aeval(Consecutive& a, const X& b) { - return *reinterpret_cast( d ); + __m256d av = _mm256_load_pd(a.mD); + __m256d bv = _mm256_set1_pd( static_cast(b) ); + __m256d ov = _mm256_add_pd(av, bv); + _mm256_store_pd(a.mD, ov); + return a; } - - template - inline decltype(auto) MkConsecutive::makeA(Args&&... args) + + template + static constexpr decltype(auto) + PlusCX::eval(const X& a, const Consecutive& b) { - static_assert(sizeof(Int) == 32/8, "lib error: Int size has changed"); - static_assert(sizeof...(Args) == AVX_SIZE/sizeof(Int), - "got inconsistent number of arguments"); - return AVX::ConsecutiveI { _mm256_setr_epi32(args...); } + Consecutive o; + __m256d av = _mm256_set1_pd( static_cast(a) ); + __m256d bv = _mm256_load_pd(b.mD); + __m256d ov = _mm256_add_pd(av, bv); + _mm256_store_pd(o.mD, ov); + return o; } } diff --git a/src/include/operation/extensions/avx.h b/src/include/operation/extensions/avx.h index c03dfa2..e402791 100644 --- a/src/include/operation/extensions/avx.h +++ b/src/include/operation/extensions/avx.h @@ -12,40 +12,31 @@ namespace CNORXZ { namespace AVX { - // define for all types that are defined in base/types.h - struct ConsecutiveD - { - __m256d mD; - }; - - struct ConsecutiveI - { - __m256i mD; - }; + static constexpr SizeT ND = AVX_VSIZE/sizeof(Double); } template <> - struct MkConsecutive + struct PlusCC { - static inline decltype(auto) make(const Double* d); + static constexpr decltype(auto) + eval(const Consecutive& a, const Consecutive& b); - static inline decltype(auto) make(Double* d); - - template - static inline decltype(auto) makeA(Args&&... args); + static constexpr decltype(auto) + aeval(Consecutive& a, const Consecutive& b); }; - - template <> - struct MkConsecutive + + template + struct PlusCX { - static inline decltype(auto) make(const Int* d); + static constexpr decltype(auto) + eval(const Consecutive& a, const X& b); - static inline decltype(auto) make(Int* d); + static constexpr decltype(auto) + aeval(Consecutive& a, const X& b); - template - static inline decltype(auto) makeA(Args&&... args); + static constexpr decltype(auto) + eval(const X& a, const Consecutive& b); }; - } #endif diff --git a/src/include/operation/extensions/reg.cc.h b/src/include/operation/extensions/reg.cc.h index 634a2c7..68e19a1 100644 --- a/src/include/operation/extensions/reg.cc.h +++ b/src/include/operation/extensions/reg.cc.h @@ -7,31 +7,12 @@ namespace CNORXZ { - template - inline decltype(auto) MkConsecutive::make(const T* d) - { - return *reinterpret_cast*>(d); - } - - template - inline decltype(auto) MkConsecutive::make(T* d) - { - return *reinterpret_cast*>(d); - } - - template - template - inline decltype(auto) MkConsecutive::makeA(Args&&... args) - { - return Consecutive { args... }; - } - template inline decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence is) { constexpr SizeT N = epos_size::value; static_assert(N == sizeof...(Is), "got inconsistent index sequence"); - return MkConsecutive::makeA( d[pos.val()+pos.template get().val()]... ); + return Consecutive { args... }; } template @@ -40,7 +21,7 @@ namespace CNORXZ constexpr SizeT N = epos_size::value; static_assert(is_epos_type::value, "got non-epos-type"); if constexpr(pos_type_is_consecutive::value){ - return MkConsecutive::make(d+pos.val()+pos.template get<0>().val()); + return *reinterpret_cast*>(d); } else { return vregi(d, pos, std::make_index_sequence{}); @@ -53,13 +34,218 @@ namespace CNORXZ constexpr SizeT N = epos_size::value; static_assert(is_epos_type::value, "got non-epos-type"); if constexpr(pos_type_is_consecutive::value){ - return MkConsecutive::make(d+pos.val()+pos.template get<0>().val()); + return *reinterpret_cast*>(d); } else { return vregi(d, pos, std::make_index_sequence{}); } } + template + constexpr decltype(auto) consecGet(const T& a) + { + if constexpr(is_consecutive_type::value){ + static_assert(I < consecutive_size::value, + "consecutive index out of range"); + return a.mD[I]; + } + else { + return a; + } + } + + template + constexpr decltype(auto) consecGet(T& a) + { + if constexpr(is_consecutive_type::value){ + static_assert(I < consecutive_size::value, + "consecutive index out of range"); + return a.mD[I]; + } + else { + return a; + } + } + + template + constexpr decltype(auto) consecApply(const F& f, const Args&... args) + { + return f( consecGet(args)... ); + } + + template + constexpr Dst& consecAssign(const F& f, Dst& dst, const Args&... args) + { + f( consecGet(dst), consecGet(args)... ); + return dst; + } + + template + static constexpr decltype(auto) consecFuncI(const F& f, const Args&... args, + std::index_sequence is); + { + typedef decltype(consecApply<0>(f, args...)) OType; + constexpr SizeT N = sizeof...(Is); + return Consecutive { consecApply(f, args...) ... }; + } + + template + constexpr Dst& consecFuncAI(const F& f, Dst& dst, const Args&... args, + std::index_sequence is) + { + ( consecAssign(f, dst, args...), ... ); + return dst; + } + + template + constexpr decltype(auto) consecFunc(const F& f, const Args&... args) + { + return consecFuncI(f, args..., std::make_index_sequence{}); + } + + template + constexpr Dst& consecFuncA(const F& f, Dst& dst, const Args&... args) + { + return consecFuncAI(f, dst, args..., std::make_index_sequence{}); + } + + /****************************** + * basic operations: plus * + ******************************/ + + template + constexpr decltype(auto) + PlusCC::eval(const Consecutive& a, const Consecutive& b) + { + return consecFunc( [](const auto& x, const auto& y) { return x + y; }, a, b ); + } + + template + constexpr Consecutive& + PlusCC::aeval(Consecutive& o, const Consecutive& a) + { + return consecFuncA( [](auto& x, const auto& y) { return x += y; }, a, b ); + } + + template + constexpr decltype(auto) PlusCX::eval(const Consecutive& a, const X& b) + { + return consecFunc( [](const auto& x, const auto& y) { return x + y; }, a, b ); + } + + template + constexpr decltype(auto) PlusCX::eval(const X& a, const Consecutive& b) + { + return consecFunc( [](const auto& x, const auto& y) { return x + y; }, a, b ); + } + + template + constexpr Consecutive& PlusCX::aeval(Consecutive& o, const X& a) + { + return consecFuncA( [](auto& x, const auto& y) { return x += y; }, a, b ); + } + + /******************************* + * basic operations: minus * + *******************************/ + + template + constexpr decltype(auto) operator-(const Consecutive& a, const Consecutive& b) + { + return consecFunc( [](const auto& x, const auto& y) { return x - y; }, a, b ); + } + + template + constexpr decltype(auto) operator-(const Consecutive& a, const U& b) + { + return consecFunc( [](const auto& x, const auto& y) { return x - y; }, a, b ); + } + + template + constexpr decltype(auto) operator-(const T& a, const Consecutive& b) + { + return consecFunc( [](const auto& x, const auto& y) { return x - y; }, a, b ); + } + + template + constexpr Consecutive& operator-=(Consecutive& o, const Consecutive& a) + { + return consecFuncA( [](auto& x, const auto& y) { return x -= y; }, a, b ); + } + + template + constexpr Consecutive& operator-=(Consecutive& o, const U& a) + { + return consecFuncA( [](auto& x, const auto& y) { return x -= y; }, a, b ); + } + + /*********************************** + * basic operations: muliplies * + ***********************************/ + + template + constexpr decltype(auto) operator*(const Consecutive& a, const Consecutive& b) + { + return consecFunc( [](const auto& x, const auto& y) { return x * y; }, a, b ); + } + + template + constexpr decltype(auto) operator*(const Consecutive& a, const U& b) + { + return consecFunc( [](const auto& x, const auto& y) { return x * y; }, a, b ); + } + + template + constexpr decltype(auto) operator*(const T& a, const Consecutive& b) + { + return consecFunc( [](const auto& x, const auto& y) { return x * y; }, a, b ); + } + + template + constexpr Consecutive& operator*=(Consecutive& o, const Consecutive& a) + { + return consecFuncA( [](const auto& x, const auto& y) { return x *= y; }, a, b ); + } + + template + constexpr Consecutive& operator*=(Consecutive& o, const U& a) + { + return consecFuncA( [](const auto& x, const auto& y) { return x *= y; }, a, b ); + } + + /********************************* + * basic operations: divides * + *********************************/ + + template + constexpr decltype(auto) operator/(const Consecutive& a, const Consecutive& b) + { + return consecFunc( [](const auto& x, const auto& y) { return x / y; }, a, b ); + } + + template + constexpr decltype(auto) operator/(const Consecutive& a, const U& b) + { + return consecFunc( [](const auto& x, const auto& y) { return x / y; }, a, b ); + } + + template + constexpr decltype(auto) operator/(const T& a, const Consecutive& b) + { + return consecFunc( [](const auto& x, const auto& y) { return x / y; }, a, b ); + } + + template + constexpr Consecutive& operator/=(Consecutive& o, const Consecutive& a) + { + return consecFuncA( [](const auto& x, const auto& y) { return x /= y; }, a, b ); + } + + template + constexpr Consecutive& operator/=(Consecutive& o, const U& a) + { + return consecFuncA( [](const auto& x, const auto& y) { return x /= y; }, a, b ); + } } #endif diff --git a/src/include/operation/extensions/reg.h b/src/include/operation/extensions/reg.h index f1c2d26..3189186 100644 --- a/src/include/operation/extensions/reg.h +++ b/src/include/operation/extensions/reg.h @@ -6,8 +6,7 @@ namespace CNORXZ { - // pseudo extension type to be returned if extension vector of - // reuqired size is not available + // no use of Arr = std::array here, since I want ensure that // it has exactly a memory size of N template @@ -16,18 +15,23 @@ namespace CNORXZ T mD[N]; }; - // specialize for all kinds of available vector registers: - template - struct MkConsecutive - { - static inline decltype(auto) make(const T* d); + template + struct is_consecutive_type { CXZ_CVAL_FALSE; }; - static inline decltype(auto) make(T* d); + template + struct consecutive_base { typedef T type; }; - template - static inline decltype(auto) makeA(Args&&... args); - }; + template + struct consecutive_size { static constexpr SizeT value = 0; }; + template + struct is_consecutive_type> { CXZ_CVAL_TRUE; }; + + template + struct consecutive_base> { typedef T type; }; + + template + struct consecutive_size> { static constexpr SizeT value = N; }; /**************************************** * consecutive generating functions * @@ -42,81 +46,139 @@ namespace CNORXZ template inline decltype(auto) vreg(T* d, const EPosT& pos); + /****************** + * ConsecFunc * + ******************/ + + template + constexpr decltype(auto) consecGet(const T& a); + + template + constexpr decltype(auto) consecGet(T& a); + + template + constexpr decltype(auto) consecApply(const F& f, const Args&... args); + + template + constexpr Dst& consecAssign(const F& f, Dst& dst, const Args&... args); + + template + constexpr decltype(auto) consecFuncI(const F& f, const Args&... args, + std::index_sequence is); + + template + constexpr Dst& consecFuncAI(const F& f, Dst& dst, const Args&... args, + std::index_sequence is); + + template + constexpr decltype(auto) consecFunc(const F& f, const Args&... args); + + template + constexpr Dst& consecFuncA(const F& f, Dst& dst, const Args&... args); + /****************************** * basic operations: plus * ******************************/ + + template + struct PlusCC + { + static constexpr decltype(auto) + eval(const Consecutive& a, const Consecutive& b); + + static constexpr decltype(auto) + aeval(Consecutive& a, const Consecutive& b); + }; + + template + struct PlusCX + { + static constexpr decltype(auto) + eval(const Consecutive& a, const X& b); + + static constexpr decltype(auto) + aeval(Consecutive& a, const X& b); + + static constexpr decltype(auto) + eval(const X& a, const Consecutive& b); + }; - template - constexpr Consecutive operator+(const Consecutive& a, const Consecutive& b); + template + constexpr decltype(auto) operator+(const Consecutive& a, const Consecutive& b) + { return PlusCC::eval(a,b); } - template - constexpr Consecutive operator+(const Consecutive& a, const T& b); + template + constexpr decltype(auto) operator+(const Consecutive& a, const U& b) + { return PlusCX::eval(a,b); } - template - constexpr Consecutive operator+(const T& a, const Consecutive& b); + template + constexpr decltype(auto) operator+(const T& a, const Consecutive& b) + { return PlusCX::eval(a,b); } - template - constexpr Consecutive operator+=(const Consecutive& a); + template + constexpr Consecutive& operator+=(Consecutive& o, const Consecutive& a) + { return PlusCC::aeval(a,b); } - template - constexpr Consecutive operator+=(const T& a); + template + constexpr Consecutive& operator+=(Consecutive& o, const U& a) + { return PlusCX::aeval(a,b); } /******************************* * basic operations: minus * *******************************/ - template - constexpr Consecutive operator-(const Consecutive& a, const Consecutive& b); + template + constexpr decltype(auto) operator-(const Consecutive& a, const Consecutive& b); - template - constexpr Consecutive operator-(const Consecutive& a, const T& b); + template + constexpr decltype(auto) operator-(const Consecutive& a, const U& b); - template - constexpr Consecutive operator-(const T& a, const Consecutive& b); + template + constexpr decltype(auto) operator-(const T& a, const Consecutive& b); - template - constexpr Consecutive operator-=(const Consecutive& a); + template + constexpr Consecutive& operator-=(Consecutive& o, const Consecutive& a); - template - constexpr Consecutive operator-=(const T& a); + template + constexpr Consecutive& operator-=(Consecutive& o, const U& a); /*********************************** * basic operations: muliplies * ***********************************/ - template - constexpr Consecutive operator*(const Consecutive& a, const Consecutive& b); + template + constexpr decltype(auto) operator*(const Consecutive& a, const Consecutive& b); - template - constexpr Consecutive operator*(const Consecutive& a, const T& b); + template + constexpr decltype(auto) operator*(const Consecutive& a, const U& b); - template - constexpr Consecutive operator*(const T& a, const Consecutive& b); + template + constexpr decltype(auto) operator*(const T& a, const Consecutive& b); - template - constexpr Consecutive operator*=(const Consecutive& a); + template + constexpr Consecutive& operator*=(Consecutive& o, const Consecutive& a); - template - constexpr Consecutive operator*=(const T& a); + template + constexpr Consecutive& operator*=(Consecutive& o, const U& a); /********************************* * basic operations: divides * *********************************/ - template - constexpr Consecutive operator/(const Consecutive& a, const Consecutive& b); + template + constexpr decltype(auto) operator/(const Consecutive& a, const Consecutive& b); - template - constexpr Consecutive operator/(const Consecutive& a, const T& b); + template + constexpr decltype(auto) operator/(const Consecutive& a, const U& b); - template - constexpr Consecutive operator/(const T& a, const Consecutive& b); + template + constexpr decltype(auto) operator/(const T& a, const Consecutive& b); - template - constexpr Consecutive operator/=(const Consecutive& a); + template + constexpr Consecutive& operator/=(Consecutive& o, const Consecutive& a); - template - constexpr Consecutive operator/=(const T& a); + template + constexpr Consecutive& operator/=(Consecutive& o, const U& a); }