This commit is contained in:
Christian Zimmermann 2022-10-25 23:45:05 +02:00
parent 1befb14039
commit cf7dcb816b
8 changed files with 249 additions and 18 deletions

View file

@ -0,0 +1,48 @@
#ifndef __cxz_avx_cc_h__
#define __cxz_avx_cc_h__
#include "avx.h"
namespace CNORXZ
{
inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::make(const Double* d)
{
return *reinterpret_cast<const AVX::ConsecutiveD*>( d );
}
inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::make(Double* d)
{
return *reinterpret_cast<AVX::ConsecutiveD*>( d );
}
template <typename... Args>
inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::makeA(Args&&... args)
{
static_assert(sizeof...(Args) == AVX_SIZE/sizeof(Double),
"got inconsistent number of arguments");
return AVX::ConsecutiveD { _mm256_setr_pd(args...); }
}
inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::make(const Int* d)
{
return *reinterpret_cast<const AVX::ConsecutiveI*>( d );
}
inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::make(Int* d)
{
return *reinterpret_cast<AVX::ConsecutiveI*>( d );
}
template <typename... Args>
inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::makeA(Args&&... args)
{
static_assert(sizeof(Int) == 32/8, "lib error: Int size has changed");
static_assert(sizeof...(Args) == AVX_SIZE/sizeof(Int),
"got inconsistent number of arguments");
return AVX::ConsecutiveI { _mm256_setr_epi32(args...); }
}
}
#endif

View file

@ -0,0 +1,51 @@
#ifndef __cxz_avx_h__
#define __cxz_avx_h__
#include <immintrin.h>
#include "base/base.h"
#define AVX_VSIZE (256/8)
namespace CNORXZ
{
namespace AVX
{
// define for all types that are defined in base/types.h
struct ConsecutiveD
{
__m256d mD;
};
struct ConsecutiveI
{
__m256i mD;
};
}
template <>
struct MkConsecutive<Double,AVX_SIZE/sizeof(Double)>
{
static inline decltype(auto) make(const Double* d);
static inline decltype(auto) make(Double* d);
template <typename... Args>
static inline decltype(auto) makeA(Args&&... args);
};
template <>
struct MkConsecutive<Double,AVX_SIZE/sizeof(Int)>
{
static inline decltype(auto) make(const Int* d);
static inline decltype(auto) make(Int* d);
template <typename... Args>
static inline decltype(auto) makeA(Args&&... args);
};
}
#endif

View file

@ -0,0 +1,11 @@
#ifndef __cxz_extensions_cc_h__
#define __cxz_extensions_cc_h__
#include "reg.cc.h"
#if CXZ_HAVE_AVX
#include "avx.cc.h"
#endif
#endif

View file

@ -0,0 +1,13 @@
#ifndef __cxz_extensions_h__
#define __cxz_extensions_h__
#include "reg.h"
#if CXZ_HAVE_AVX
#include "avx.h"
#endif
#include "extensions.cc.h"
#endif

View file

@ -7,22 +7,40 @@
namespace CNORXZ namespace CNORXZ
{ {
template <typename T, SizeT N>
inline decltype(auto) MkConsecutive<T,N>::make(const T* d)
{
return *reinterpret_cast<const Consecutive<T,N>*>(d);
}
template <typename T, SizeT N>
inline decltype(auto) MkConsecutive<T,N>::make(T* d)
{
return *reinterpret_cast<Consecutive<T,N>*>(d);
}
template <typename T, SizeT N>
template <typename... Args>
inline decltype(auto) MkConsecutive<T,N>::makeA(Args&&... args)
{
return Consecutive<T,N> { args... };
}
template <typename T, class EPosT, SizeT... Is> template <typename T, class EPosT, SizeT... Is>
decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is) inline decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is)
{ {
constexpr SizeT N = epos_size<EPosT>::value; constexpr SizeT N = epos_size<EPosT>::value;
static_assert(N == sizeof...(Is), "got inconsistent index sequence"); static_assert(N == sizeof...(Is), "got inconsistent index sequence");
return PseudoReg<T,N> { d[pos.val()+pos.template get<Is>().val()]... }; return MkConsecutive<T,N>::makeA( d[pos.val()+pos.template get<Is>().val()]... );
} }
template <typename T, class EPosT> template <typename T, class EPosT>
decltype(auto) vreg(const T* d, const EPosT& pos) inline decltype(auto) vreg(const T* d, const EPosT& pos)
{ {
constexpr SizeT N = epos_size<EPosT>::value; constexpr SizeT N = epos_size<EPosT>::value;
static_assert(is_epos_type<EPosT>::value, "got non-epos-type"); static_assert(is_epos_type<EPosT>::value, "got non-epos-type");
if constexpr(pos_type_is_consecutive<EPosT>::value){ if constexpr(pos_type_is_consecutive<EPosT>::value){
return *reinterpret_cast<const PseudoReg<T,N>*> return MkConsecutive<T,N>::make(d+pos.val()+pos.template get<0>().val());
(d+pos.val()+pos.template get<0>().val());
} }
else { else {
return vregi(d, pos, std::make_index_sequence<N>{}); return vregi(d, pos, std::make_index_sequence<N>{});
@ -30,13 +48,12 @@ namespace CNORXZ
} }
template <typename T, class EPosT> template <typename T, class EPosT>
decltype(auto) vreg(T* d, const EPosT& pos) inline decltype(auto) vreg(T* d, const EPosT& pos)
{ {
constexpr SizeT N = epos_size<EPosT>::value; constexpr SizeT N = epos_size<EPosT>::value;
static_assert(is_epos_type<EPosT>::value, "got non-epos-type"); static_assert(is_epos_type<EPosT>::value, "got non-epos-type");
if constexpr(pos_type_is_consecutive<EPosT>::value){ if constexpr(pos_type_is_consecutive<EPosT>::value){
return *reinterpret_cast<PseudoReg<T,N>*> return MkConsecutive<T,N>::make(d+pos.val()+pos.template get<0>().val());
(d+pos.val()+pos.template get<0>().val());
} }
else { else {
return vregi(d, pos, std::make_index_sequence<N>{}); return vregi(d, pos, std::make_index_sequence<N>{});

View file

@ -11,23 +11,113 @@ namespace CNORXZ
// no use of Arr = std::array here, since I want ensure that // no use of Arr = std::array here, since I want ensure that
// it has exactly a memory size of N // it has exactly a memory size of N
template <typename T, SizeT N> template <typename T, SizeT N>
struct PseudoReg struct Consecutive
{ {
T mD[N]; T mD[N];
}; };
// specialize for all kinds of available vector registers:
template <typename T, SizeT N>
struct MkConsecutive
{
static inline decltype(auto) make(const T* d);
static inline decltype(auto) make(T* d);
template <typename... Args>
static inline decltype(auto) makeA(Args&&... args);
};
/****************************************
* consecutive generating functions *
****************************************/
template <typename T, class EPosT, SizeT... Is> template <typename T, class EPosT, SizeT... Is>
decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is); inline decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is);
// specialize for all kinds of available vector registers:
template <typename T, class EPosT> template <typename T, class EPosT>
decltype(auto) vreg(const T* d, const EPosT& pos); inline decltype(auto) vreg(const T* d, const EPosT& pos);
// specialize for all kinds of available vector registers:
template <typename T, class EPosT> template <typename T, class EPosT>
decltype(auto) vreg(T* d, const EPosT& pos); inline decltype(auto) vreg(T* d, const EPosT& pos);
/******************************
* basic operations: plus *
******************************/
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator+(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator+(const Consecutive<T,N>& a, const T& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator+(const T& a, const Consecutive<T,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator+=(const Consecutive<T,N>& a);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator+=(const T& a);
/*******************************
* basic operations: minus *
*******************************/
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator-(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator-(const Consecutive<T,N>& a, const T& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator-(const T& a, const Consecutive<T,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator-=(const Consecutive<T,N>& a);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator-=(const T& a);
/***********************************
* basic operations: muliplies *
***********************************/
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator*(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator*(const Consecutive<T,N>& a, const T& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator*(const T& a, const Consecutive<T,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator*=(const Consecutive<T,N>& a);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator*=(const T& a);
/*********************************
* basic operations: divides *
*********************************/
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator/(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator/(const Consecutive<T,N>& a, const T& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator/(const T& a, const Consecutive<T,N>& b);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator/=(const Consecutive<T,N>& a);
template <typename T, SizeT N>
constexpr Consecutive<T,N> operator/=(const T& a);
// TODO: Maybe specialize PseudoReg (-> Reg) itself (?)
} }
#endif #endif

View file

@ -6,6 +6,7 @@
#include "xpr/pos_type.h" #include "xpr/pos_type.h"
#include "xpr/op_xpr.h" #include "xpr/op_xpr.h"
#include "op_utility.h" #include "op_utility.h"
#include "extensions/extensions.h"
namespace CNORXZ namespace CNORXZ
{ {

View file

@ -575,10 +575,10 @@ namespace CNORXZ
{ {
if constexpr(is_static_pos_type<OPosT1>::value and is_static_pos_type<OPosT2>::value){ if constexpr(is_static_pos_type<OPosT1>::value and is_static_pos_type<OPosT2>::value){
if constexpr(sizeof...(OPosTs) != 0){ if constexpr(sizeof...(OPosTs) != 0){
return OPosT1().val() < OPosT2().val() and pos_types_consecutive<OPosT2,OPosTs...>::value; return OPosT1().val()+1 == OPosT2().val() and pos_types_consecutive<OPosT2,OPosTs...>::value;
} }
else { else {
return OPosT1().val() < OPosT2().val(); return OPosT1().val()+1 == OPosT2().val();
} }
} }
return false; return false;