WIP: avx
This commit is contained in:
parent
1befb14039
commit
cf7dcb816b
8 changed files with 249 additions and 18 deletions
48
src/include/operation/extensions/avx.cc.h
Normal file
48
src/include/operation/extensions/avx.cc.h
Normal file
|
@ -0,0 +1,48 @@
|
|||
|
||||
#ifndef __cxz_avx_cc_h__
|
||||
#define __cxz_avx_cc_h__
|
||||
|
||||
#include "avx.h"
|
||||
|
||||
namespace CNORXZ
|
||||
{
|
||||
inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::make(const Double* d)
|
||||
{
|
||||
return *reinterpret_cast<const AVX::ConsecutiveD*>( d );
|
||||
}
|
||||
|
||||
inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::make(Double* d)
|
||||
{
|
||||
return *reinterpret_cast<AVX::ConsecutiveD*>( d );
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::makeA(Args&&... args)
|
||||
{
|
||||
static_assert(sizeof...(Args) == AVX_SIZE/sizeof(Double),
|
||||
"got inconsistent number of arguments");
|
||||
return AVX::ConsecutiveD { _mm256_setr_pd(args...); }
|
||||
}
|
||||
|
||||
inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::make(const Int* d)
|
||||
{
|
||||
return *reinterpret_cast<const AVX::ConsecutiveI*>( d );
|
||||
}
|
||||
|
||||
inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::make(Int* d)
|
||||
{
|
||||
return *reinterpret_cast<AVX::ConsecutiveI*>( d );
|
||||
}
|
||||
|
||||
template <typename... Args>
|
||||
inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::makeA(Args&&... args)
|
||||
{
|
||||
static_assert(sizeof(Int) == 32/8, "lib error: Int size has changed");
|
||||
static_assert(sizeof...(Args) == AVX_SIZE/sizeof(Int),
|
||||
"got inconsistent number of arguments");
|
||||
return AVX::ConsecutiveI { _mm256_setr_epi32(args...); }
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#endif
|
51
src/include/operation/extensions/avx.h
Normal file
51
src/include/operation/extensions/avx.h
Normal file
|
@ -0,0 +1,51 @@
|
|||
|
||||
#ifndef __cxz_avx_h__
|
||||
#define __cxz_avx_h__
|
||||
|
||||
#include <immintrin.h>
|
||||
|
||||
#include "base/base.h"
|
||||
|
||||
#define AVX_VSIZE (256/8)
|
||||
|
||||
namespace CNORXZ
|
||||
{
|
||||
namespace AVX
|
||||
{
|
||||
// define for all types that are defined in base/types.h
|
||||
struct ConsecutiveD
|
||||
{
|
||||
__m256d mD;
|
||||
};
|
||||
|
||||
struct ConsecutiveI
|
||||
{
|
||||
__m256i mD;
|
||||
};
|
||||
}
|
||||
|
||||
template <>
|
||||
struct MkConsecutive<Double,AVX_SIZE/sizeof(Double)>
|
||||
{
|
||||
static inline decltype(auto) make(const Double* d);
|
||||
|
||||
static inline decltype(auto) make(Double* d);
|
||||
|
||||
template <typename... Args>
|
||||
static inline decltype(auto) makeA(Args&&... args);
|
||||
};
|
||||
|
||||
template <>
|
||||
struct MkConsecutive<Double,AVX_SIZE/sizeof(Int)>
|
||||
{
|
||||
static inline decltype(auto) make(const Int* d);
|
||||
|
||||
static inline decltype(auto) make(Int* d);
|
||||
|
||||
template <typename... Args>
|
||||
static inline decltype(auto) makeA(Args&&... args);
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
11
src/include/operation/extensions/extensions.cc.h
Normal file
11
src/include/operation/extensions/extensions.cc.h
Normal file
|
@ -0,0 +1,11 @@
|
|||
|
||||
#ifndef __cxz_extensions_cc_h__
|
||||
#define __cxz_extensions_cc_h__
|
||||
|
||||
#include "reg.cc.h"
|
||||
|
||||
#if CXZ_HAVE_AVX
|
||||
#include "avx.cc.h"
|
||||
#endif
|
||||
|
||||
#endif
|
13
src/include/operation/extensions/extensions.h
Normal file
13
src/include/operation/extensions/extensions.h
Normal file
|
@ -0,0 +1,13 @@
|
|||
|
||||
#ifndef __cxz_extensions_h__
|
||||
#define __cxz_extensions_h__
|
||||
|
||||
#include "reg.h"
|
||||
|
||||
#if CXZ_HAVE_AVX
|
||||
#include "avx.h"
|
||||
#endif
|
||||
|
||||
#include "extensions.cc.h"
|
||||
|
||||
#endif
|
|
@ -7,22 +7,40 @@
|
|||
|
||||
namespace CNORXZ
|
||||
{
|
||||
template <typename T, SizeT N>
|
||||
inline decltype(auto) MkConsecutive<T,N>::make(const T* d)
|
||||
{
|
||||
return *reinterpret_cast<const Consecutive<T,N>*>(d);
|
||||
}
|
||||
|
||||
template <typename T, SizeT N>
|
||||
inline decltype(auto) MkConsecutive<T,N>::make(T* d)
|
||||
{
|
||||
return *reinterpret_cast<Consecutive<T,N>*>(d);
|
||||
}
|
||||
|
||||
template <typename T, SizeT N>
|
||||
template <typename... Args>
|
||||
inline decltype(auto) MkConsecutive<T,N>::makeA(Args&&... args)
|
||||
{
|
||||
return Consecutive<T,N> { args... };
|
||||
}
|
||||
|
||||
template <typename T, class EPosT, SizeT... Is>
|
||||
decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is)
|
||||
inline decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is)
|
||||
{
|
||||
constexpr SizeT N = epos_size<EPosT>::value;
|
||||
static_assert(N == sizeof...(Is), "got inconsistent index sequence");
|
||||
return PseudoReg<T,N> { d[pos.val()+pos.template get<Is>().val()]... };
|
||||
return MkConsecutive<T,N>::makeA( d[pos.val()+pos.template get<Is>().val()]... );
|
||||
}
|
||||
|
||||
template <typename T, class EPosT>
|
||||
decltype(auto) vreg(const T* d, const EPosT& pos)
|
||||
inline decltype(auto) vreg(const T* d, const EPosT& pos)
|
||||
{
|
||||
constexpr SizeT N = epos_size<EPosT>::value;
|
||||
static_assert(is_epos_type<EPosT>::value, "got non-epos-type");
|
||||
if constexpr(pos_type_is_consecutive<EPosT>::value){
|
||||
return *reinterpret_cast<const PseudoReg<T,N>*>
|
||||
(d+pos.val()+pos.template get<0>().val());
|
||||
return MkConsecutive<T,N>::make(d+pos.val()+pos.template get<0>().val());
|
||||
}
|
||||
else {
|
||||
return vregi(d, pos, std::make_index_sequence<N>{});
|
||||
|
@ -30,13 +48,12 @@ namespace CNORXZ
|
|||
}
|
||||
|
||||
template <typename T, class EPosT>
|
||||
decltype(auto) vreg(T* d, const EPosT& pos)
|
||||
inline decltype(auto) vreg(T* d, const EPosT& pos)
|
||||
{
|
||||
constexpr SizeT N = epos_size<EPosT>::value;
|
||||
static_assert(is_epos_type<EPosT>::value, "got non-epos-type");
|
||||
if constexpr(pos_type_is_consecutive<EPosT>::value){
|
||||
return *reinterpret_cast<PseudoReg<T,N>*>
|
||||
(d+pos.val()+pos.template get<0>().val());
|
||||
return MkConsecutive<T,N>::make(d+pos.val()+pos.template get<0>().val());
|
||||
}
|
||||
else {
|
||||
return vregi(d, pos, std::make_index_sequence<N>{});
|
||||
|
|
|
@ -11,23 +11,113 @@ namespace CNORXZ
|
|||
// no use of Arr = std::array here, since I want ensure that
|
||||
// it has exactly a memory size of N
|
||||
template <typename T, SizeT N>
|
||||
struct PseudoReg
|
||||
struct Consecutive
|
||||
{
|
||||
T mD[N];
|
||||
};
|
||||
|
||||
// specialize for all kinds of available vector registers:
|
||||
template <typename T, SizeT N>
|
||||
struct MkConsecutive
|
||||
{
|
||||
static inline decltype(auto) make(const T* d);
|
||||
|
||||
static inline decltype(auto) make(T* d);
|
||||
|
||||
template <typename... Args>
|
||||
static inline decltype(auto) makeA(Args&&... args);
|
||||
};
|
||||
|
||||
|
||||
/****************************************
|
||||
* consecutive generating functions *
|
||||
****************************************/
|
||||
|
||||
template <typename T, class EPosT, SizeT... Is>
|
||||
decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is);
|
||||
inline decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is);
|
||||
|
||||
// specialize for all kinds of available vector registers:
|
||||
template <typename T, class EPosT>
|
||||
decltype(auto) vreg(const T* d, const EPosT& pos);
|
||||
inline decltype(auto) vreg(const T* d, const EPosT& pos);
|
||||
|
||||
// specialize for all kinds of available vector registers:
|
||||
template <typename T, class EPosT>
|
||||
decltype(auto) vreg(T* d, const EPosT& pos);
|
||||
inline decltype(auto) vreg(T* d, const EPosT& pos);
|
||||
|
||||
/******************************
|
||||
* basic operations: plus *
|
||||
******************************/
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator+(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator+(const Consecutive<T,N>& a, const T& b);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator+(const T& a, const Consecutive<T,N>& b);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator+=(const Consecutive<T,N>& a);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator+=(const T& a);
|
||||
|
||||
/*******************************
|
||||
* basic operations: minus *
|
||||
*******************************/
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator-(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator-(const Consecutive<T,N>& a, const T& b);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator-(const T& a, const Consecutive<T,N>& b);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator-=(const Consecutive<T,N>& a);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator-=(const T& a);
|
||||
|
||||
/***********************************
|
||||
* basic operations: muliplies *
|
||||
***********************************/
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator*(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator*(const Consecutive<T,N>& a, const T& b);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator*(const T& a, const Consecutive<T,N>& b);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator*=(const Consecutive<T,N>& a);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator*=(const T& a);
|
||||
|
||||
/*********************************
|
||||
* basic operations: divides *
|
||||
*********************************/
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator/(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator/(const Consecutive<T,N>& a, const T& b);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator/(const T& a, const Consecutive<T,N>& b);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator/=(const Consecutive<T,N>& a);
|
||||
|
||||
template <typename T, SizeT N>
|
||||
constexpr Consecutive<T,N> operator/=(const T& a);
|
||||
|
||||
// TODO: Maybe specialize PseudoReg (-> Reg) itself (?)
|
||||
}
|
||||
|
||||
#endif
|
||||
|
|
|
@ -6,6 +6,7 @@
|
|||
#include "xpr/pos_type.h"
|
||||
#include "xpr/op_xpr.h"
|
||||
#include "op_utility.h"
|
||||
#include "extensions/extensions.h"
|
||||
|
||||
namespace CNORXZ
|
||||
{
|
||||
|
|
|
@ -575,10 +575,10 @@ namespace CNORXZ
|
|||
{
|
||||
if constexpr(is_static_pos_type<OPosT1>::value and is_static_pos_type<OPosT2>::value){
|
||||
if constexpr(sizeof...(OPosTs) != 0){
|
||||
return OPosT1().val() < OPosT2().val() and pos_types_consecutive<OPosT2,OPosTs...>::value;
|
||||
return OPosT1().val()+1 == OPosT2().val() and pos_types_consecutive<OPosT2,OPosTs...>::value;
|
||||
}
|
||||
else {
|
||||
return OPosT1().val() < OPosT2().val();
|
||||
return OPosT1().val()+1 == OPosT2().val();
|
||||
}
|
||||
}
|
||||
return false;
|
||||
|
|
Loading…
Reference in a new issue