WIP: avx

2022-10-25 23:45:05 +02:00 · 2022-10-25 23:45:05 +02:00 · cf7dcb816b
commit cf7dcb816b
parent 1befb14039
8 changed files with 249 additions and 18 deletions
--- a/src/include/operation/extensions/avx.cc.h
+++ b/src/include/operation/extensions/avx.cc.h
@ -0,0 +1,48 @@
 #ifndef __cxz_avx_cc_h__
 #define __cxz_avx_cc_h__
 #include "avx.h"
 namespace CNORXZ
 {
    inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::make(const Double* d)
    {
 	return *reinterpret_cast<const AVX::ConsecutiveD*>( d );
    }
    inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::make(Double* d)
    {
 	return *reinterpret_cast<AVX::ConsecutiveD*>( d );
    }
    template <typename... Args>
    inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::makeA(Args&&... args)
    {
 	static_assert(sizeof...(Args) == AVX_SIZE/sizeof(Double),
 		      "got inconsistent number of arguments");
 	return AVX::ConsecutiveD { _mm256_setr_pd(args...); }
    }
    inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::make(const Int* d)
    {
 	return *reinterpret_cast<const AVX::ConsecutiveI*>( d );
    }
    inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::make(Int* d)
    {
 	return *reinterpret_cast<AVX::ConsecutiveI*>( d );
    }
    template <typename... Args>
    inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::makeA(Args&&... args)
    {
 	static_assert(sizeof(Int) == 32/8, "lib error: Int size has changed");
 	static_assert(sizeof...(Args) == AVX_SIZE/sizeof(Int),
 		      "got inconsistent number of arguments");
 	return AVX::ConsecutiveI { _mm256_setr_epi32(args...); }
    }
 }
 #endif
--- a/src/include/operation/extensions/avx.h
+++ b/src/include/operation/extensions/avx.h
@ -0,0 +1,51 @@
 #ifndef __cxz_avx_h__
 #define __cxz_avx_h__
 #include <immintrin.h>
 #include "base/base.h"
 #define AVX_VSIZE (256/8)
 namespace CNORXZ
 {
    namespace AVX
    {
 	// define for all types that are defined in base/types.h
 	struct ConsecutiveD
 	{
 	    __m256d mD;
 	};
 	struct ConsecutiveI
 	{
 	    __m256i mD;
 	};
    }
    template <>
    struct MkConsecutive<Double,AVX_SIZE/sizeof(Double)>
    {
 	static inline decltype(auto) make(const Double* d);
 	static inline decltype(auto) make(Double* d);
 	template <typename... Args>
 	static inline decltype(auto) makeA(Args&&... args);
    };
    template <>
    struct MkConsecutive<Double,AVX_SIZE/sizeof(Int)>
    {
 	static inline decltype(auto) make(const Int* d);
 	static inline decltype(auto) make(Int* d);
 	template <typename... Args>
 	static inline decltype(auto) makeA(Args&&... args);
    };
 }
 #endif
--- a/src/include/operation/extensions/extensions.cc.h
+++ b/src/include/operation/extensions/extensions.cc.h
@ -0,0 +1,11 @@
 #ifndef __cxz_extensions_cc_h__
 #define __cxz_extensions_cc_h__
 #include "reg.cc.h"
 #if CXZ_HAVE_AVX
 #include "avx.cc.h"
 #endif
 #endif
--- a/src/include/operation/extensions/extensions.h
+++ b/src/include/operation/extensions/extensions.h
@ -0,0 +1,13 @@
 #ifndef __cxz_extensions_h__
 #define __cxz_extensions_h__
 #include "reg.h"
 #if CXZ_HAVE_AVX
 #include "avx.h"
 #endif
 #include "extensions.cc.h"
 #endif
--- a/src/include/operation/extensions/reg.cc.h
+++ b/src/include/operation/extensions/reg.cc.h
@ -7,22 +7,40 @@
 namespace CNORXZ
 {
    template <typename T, SizeT N>
    inline decltype(auto) MkConsecutive<T,N>::make(const T* d)
    {
 	return *reinterpret_cast<const Consecutive<T,N>*>(d);
    }
    template <typename T, SizeT N>
    inline decltype(auto) MkConsecutive<T,N>::make(T* d)
    {
 	return *reinterpret_cast<Consecutive<T,N>*>(d);
    }
    template <typename T, SizeT N>
    template <typename... Args>
    inline decltype(auto) MkConsecutive<T,N>::makeA(Args&&... args)
    {
 	return Consecutive<T,N> { args... };
    }
    template <typename T, class EPosT, SizeT... Is>
-    decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is)
+    inline decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is)
    {
 	constexpr SizeT N = epos_size<EPosT>::value;
 	static_assert(N == sizeof...(Is), "got inconsistent index sequence");
-	return PseudoReg<T,N> { d[pos.val()+pos.template get<Is>().val()]... };
+	return MkConsecutive<T,N>::makeA( d[pos.val()+pos.template get<Is>().val()]... );
    }
    template <typename T, class EPosT>
-    decltype(auto) vreg(const T* d, const EPosT& pos)
+    inline decltype(auto) vreg(const T* d, const EPosT& pos)
    {
 	constexpr SizeT N = epos_size<EPosT>::value;
 	static_assert(is_epos_type<EPosT>::value, "got non-epos-type");
 	if constexpr(pos_type_is_consecutive<EPosT>::value){
-	    return *reinterpret_cast<const PseudoReg<T,N>*>
+	    return MkConsecutive<T,N>::make(d+pos.val()+pos.template get<0>().val());
 		(d+pos.val()+pos.template get<0>().val());
 	}
 	else {
 	    return vregi(d, pos, std::make_index_sequence<N>{});
@ -30,13 +48,12 @@ namespace CNORXZ
    }
    template <typename T, class EPosT>
-    decltype(auto) vreg(T* d, const EPosT& pos)
+    inline decltype(auto) vreg(T* d, const EPosT& pos)
    {
 	constexpr SizeT N = epos_size<EPosT>::value;
 	static_assert(is_epos_type<EPosT>::value, "got non-epos-type");
 	if constexpr(pos_type_is_consecutive<EPosT>::value){
-	    return *reinterpret_cast<PseudoReg<T,N>*>
+	    return MkConsecutive<T,N>::make(d+pos.val()+pos.template get<0>().val());
 		(d+pos.val()+pos.template get<0>().val());
 	}
 	else {
 	    return vregi(d, pos, std::make_index_sequence<N>{});
--- a/src/include/operation/extensions/reg.h
+++ b/src/include/operation/extensions/reg.h
@ -11,23 +11,113 @@ namespace CNORXZ
    // no use of Arr = std::array here, since I want ensure that
    // it has exactly a memory size of N
    template <typename T, SizeT N>
-    struct PseudoReg
+    struct Consecutive
    {
 	T mD[N];
    };
    // specialize for all kinds of available vector registers:
    template <typename T, SizeT N>
    struct MkConsecutive
    {
 	static inline decltype(auto) make(const T* d);
 	static inline decltype(auto) make(T* d);
 	template <typename... Args>
 	static inline decltype(auto) makeA(Args&&... args);
    };
    /****************************************
     *   consecutive generating functions   *
     ****************************************/
    template <typename T, class EPosT, SizeT... Is>
-    decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is);
+    inline decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is);
    // specialize for all kinds of available vector registers:
    template <typename T, class EPosT>
-    decltype(auto) vreg(const T* d, const EPosT& pos);
+    inline decltype(auto) vreg(const T* d, const EPosT& pos);
    // specialize for all kinds of available vector registers:
    template <typename T, class EPosT>
-    decltype(auto) vreg(T* d, const EPosT& pos);
+    inline decltype(auto) vreg(T* d, const EPosT& pos);
    /******************************
     *   basic operations: plus   *
     ******************************/
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator+(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator+(const Consecutive<T,N>& a, const T& b);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator+(const T& a, const Consecutive<T,N>& b);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator+=(const Consecutive<T,N>& a);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator+=(const T& a);
    /*******************************
     *   basic operations: minus   *
     *******************************/
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator-(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator-(const Consecutive<T,N>& a, const T& b);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator-(const T& a, const Consecutive<T,N>& b);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator-=(const Consecutive<T,N>& a);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator-=(const T& a);
    /***********************************
     *   basic operations: muliplies   *
     ***********************************/
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator*(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator*(const Consecutive<T,N>& a, const T& b);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator*(const T& a, const Consecutive<T,N>& b);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator*=(const Consecutive<T,N>& a);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator*=(const T& a);
    /*********************************
     *   basic operations: divides   *
     *********************************/
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator/(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator/(const Consecutive<T,N>& a, const T& b);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator/(const T& a, const Consecutive<T,N>& b);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator/=(const Consecutive<T,N>& a);
    template <typename T, SizeT N>
    constexpr Consecutive<T,N> operator/=(const T& a);
    // TODO: Maybe specialize PseudoReg (-> Reg) itself (?)
 }
 #endif
--- a/src/include/operation/op_types.cc.h
+++ b/src/include/operation/op_types.cc.h
@ -6,6 +6,7 @@
 #include "xpr/pos_type.h"
 #include "xpr/op_xpr.h"
 #include "op_utility.h"
 #include "extensions/extensions.h"
 namespace CNORXZ
 {
--- a/src/include/xpr/pos_type.cc.h
+++ b/src/include/xpr/pos_type.cc.h
@ -575,10 +575,10 @@ namespace CNORXZ
    {
 	if constexpr(is_static_pos_type<OPosT1>::value and is_static_pos_type<OPosT2>::value){
 	    if constexpr(sizeof...(OPosTs) != 0){
-		return OPosT1().val() < OPosT2().val() and pos_types_consecutive<OPosT2,OPosTs...>::value;
+		return OPosT1().val()+1 == OPosT2().val() and pos_types_consecutive<OPosT2,OPosTs...>::value;
 	    }
 	    else {
-		return OPosT1().val() < OPosT2().val();
+		return OPosT1().val()+1 == OPosT2().val();
 	    }
 	}
 	return false;