WIP: avx
This commit is contained in:
parent
1befb14039
commit
cf7dcb816b
8 changed files with 249 additions and 18 deletions
48
src/include/operation/extensions/avx.cc.h
Normal file
48
src/include/operation/extensions/avx.cc.h
Normal file
|
@ -0,0 +1,48 @@
|
||||||
|
|
||||||
|
#ifndef __cxz_avx_cc_h__
|
||||||
|
#define __cxz_avx_cc_h__
|
||||||
|
|
||||||
|
#include "avx.h"
|
||||||
|
|
||||||
|
namespace CNORXZ
|
||||||
|
{
|
||||||
|
inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::make(const Double* d)
|
||||||
|
{
|
||||||
|
return *reinterpret_cast<const AVX::ConsecutiveD*>( d );
|
||||||
|
}
|
||||||
|
|
||||||
|
inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::make(Double* d)
|
||||||
|
{
|
||||||
|
return *reinterpret_cast<AVX::ConsecutiveD*>( d );
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
inline decltype(auto) MkConsecutive<Double,AVX_SIZE/sizeof(Double)>::makeA(Args&&... args)
|
||||||
|
{
|
||||||
|
static_assert(sizeof...(Args) == AVX_SIZE/sizeof(Double),
|
||||||
|
"got inconsistent number of arguments");
|
||||||
|
return AVX::ConsecutiveD { _mm256_setr_pd(args...); }
|
||||||
|
}
|
||||||
|
|
||||||
|
inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::make(const Int* d)
|
||||||
|
{
|
||||||
|
return *reinterpret_cast<const AVX::ConsecutiveI*>( d );
|
||||||
|
}
|
||||||
|
|
||||||
|
inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::make(Int* d)
|
||||||
|
{
|
||||||
|
return *reinterpret_cast<AVX::ConsecutiveI*>( d );
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
inline decltype(auto) MkConsecutive<Int,AVX_SIZE/sizeof(Int)>::makeA(Args&&... args)
|
||||||
|
{
|
||||||
|
static_assert(sizeof(Int) == 32/8, "lib error: Int size has changed");
|
||||||
|
static_assert(sizeof...(Args) == AVX_SIZE/sizeof(Int),
|
||||||
|
"got inconsistent number of arguments");
|
||||||
|
return AVX::ConsecutiveI { _mm256_setr_epi32(args...); }
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
51
src/include/operation/extensions/avx.h
Normal file
51
src/include/operation/extensions/avx.h
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
|
||||||
|
#ifndef __cxz_avx_h__
|
||||||
|
#define __cxz_avx_h__
|
||||||
|
|
||||||
|
#include <immintrin.h>
|
||||||
|
|
||||||
|
#include "base/base.h"
|
||||||
|
|
||||||
|
#define AVX_VSIZE (256/8)
|
||||||
|
|
||||||
|
namespace CNORXZ
|
||||||
|
{
|
||||||
|
namespace AVX
|
||||||
|
{
|
||||||
|
// define for all types that are defined in base/types.h
|
||||||
|
struct ConsecutiveD
|
||||||
|
{
|
||||||
|
__m256d mD;
|
||||||
|
};
|
||||||
|
|
||||||
|
struct ConsecutiveI
|
||||||
|
{
|
||||||
|
__m256i mD;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct MkConsecutive<Double,AVX_SIZE/sizeof(Double)>
|
||||||
|
{
|
||||||
|
static inline decltype(auto) make(const Double* d);
|
||||||
|
|
||||||
|
static inline decltype(auto) make(Double* d);
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
static inline decltype(auto) makeA(Args&&... args);
|
||||||
|
};
|
||||||
|
|
||||||
|
template <>
|
||||||
|
struct MkConsecutive<Double,AVX_SIZE/sizeof(Int)>
|
||||||
|
{
|
||||||
|
static inline decltype(auto) make(const Int* d);
|
||||||
|
|
||||||
|
static inline decltype(auto) make(Int* d);
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
static inline decltype(auto) makeA(Args&&... args);
|
||||||
|
};
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
#endif
|
11
src/include/operation/extensions/extensions.cc.h
Normal file
11
src/include/operation/extensions/extensions.cc.h
Normal file
|
@ -0,0 +1,11 @@
|
||||||
|
|
||||||
|
#ifndef __cxz_extensions_cc_h__
|
||||||
|
#define __cxz_extensions_cc_h__
|
||||||
|
|
||||||
|
#include "reg.cc.h"
|
||||||
|
|
||||||
|
#if CXZ_HAVE_AVX
|
||||||
|
#include "avx.cc.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
13
src/include/operation/extensions/extensions.h
Normal file
13
src/include/operation/extensions/extensions.h
Normal file
|
@ -0,0 +1,13 @@
|
||||||
|
|
||||||
|
#ifndef __cxz_extensions_h__
|
||||||
|
#define __cxz_extensions_h__
|
||||||
|
|
||||||
|
#include "reg.h"
|
||||||
|
|
||||||
|
#if CXZ_HAVE_AVX
|
||||||
|
#include "avx.h"
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "extensions.cc.h"
|
||||||
|
|
||||||
|
#endif
|
|
@ -7,22 +7,40 @@
|
||||||
|
|
||||||
namespace CNORXZ
|
namespace CNORXZ
|
||||||
{
|
{
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
inline decltype(auto) MkConsecutive<T,N>::make(const T* d)
|
||||||
|
{
|
||||||
|
return *reinterpret_cast<const Consecutive<T,N>*>(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
inline decltype(auto) MkConsecutive<T,N>::make(T* d)
|
||||||
|
{
|
||||||
|
return *reinterpret_cast<Consecutive<T,N>*>(d);
|
||||||
|
}
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
template <typename... Args>
|
||||||
|
inline decltype(auto) MkConsecutive<T,N>::makeA(Args&&... args)
|
||||||
|
{
|
||||||
|
return Consecutive<T,N> { args... };
|
||||||
|
}
|
||||||
|
|
||||||
template <typename T, class EPosT, SizeT... Is>
|
template <typename T, class EPosT, SizeT... Is>
|
||||||
decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is)
|
inline decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is)
|
||||||
{
|
{
|
||||||
constexpr SizeT N = epos_size<EPosT>::value;
|
constexpr SizeT N = epos_size<EPosT>::value;
|
||||||
static_assert(N == sizeof...(Is), "got inconsistent index sequence");
|
static_assert(N == sizeof...(Is), "got inconsistent index sequence");
|
||||||
return PseudoReg<T,N> { d[pos.val()+pos.template get<Is>().val()]... };
|
return MkConsecutive<T,N>::makeA( d[pos.val()+pos.template get<Is>().val()]... );
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, class EPosT>
|
template <typename T, class EPosT>
|
||||||
decltype(auto) vreg(const T* d, const EPosT& pos)
|
inline decltype(auto) vreg(const T* d, const EPosT& pos)
|
||||||
{
|
{
|
||||||
constexpr SizeT N = epos_size<EPosT>::value;
|
constexpr SizeT N = epos_size<EPosT>::value;
|
||||||
static_assert(is_epos_type<EPosT>::value, "got non-epos-type");
|
static_assert(is_epos_type<EPosT>::value, "got non-epos-type");
|
||||||
if constexpr(pos_type_is_consecutive<EPosT>::value){
|
if constexpr(pos_type_is_consecutive<EPosT>::value){
|
||||||
return *reinterpret_cast<const PseudoReg<T,N>*>
|
return MkConsecutive<T,N>::make(d+pos.val()+pos.template get<0>().val());
|
||||||
(d+pos.val()+pos.template get<0>().val());
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return vregi(d, pos, std::make_index_sequence<N>{});
|
return vregi(d, pos, std::make_index_sequence<N>{});
|
||||||
|
@ -30,13 +48,12 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename T, class EPosT>
|
template <typename T, class EPosT>
|
||||||
decltype(auto) vreg(T* d, const EPosT& pos)
|
inline decltype(auto) vreg(T* d, const EPosT& pos)
|
||||||
{
|
{
|
||||||
constexpr SizeT N = epos_size<EPosT>::value;
|
constexpr SizeT N = epos_size<EPosT>::value;
|
||||||
static_assert(is_epos_type<EPosT>::value, "got non-epos-type");
|
static_assert(is_epos_type<EPosT>::value, "got non-epos-type");
|
||||||
if constexpr(pos_type_is_consecutive<EPosT>::value){
|
if constexpr(pos_type_is_consecutive<EPosT>::value){
|
||||||
return *reinterpret_cast<PseudoReg<T,N>*>
|
return MkConsecutive<T,N>::make(d+pos.val()+pos.template get<0>().val());
|
||||||
(d+pos.val()+pos.template get<0>().val());
|
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return vregi(d, pos, std::make_index_sequence<N>{});
|
return vregi(d, pos, std::make_index_sequence<N>{});
|
||||||
|
|
|
@ -11,23 +11,113 @@ namespace CNORXZ
|
||||||
// no use of Arr = std::array here, since I want ensure that
|
// no use of Arr = std::array here, since I want ensure that
|
||||||
// it has exactly a memory size of N
|
// it has exactly a memory size of N
|
||||||
template <typename T, SizeT N>
|
template <typename T, SizeT N>
|
||||||
struct PseudoReg
|
struct Consecutive
|
||||||
{
|
{
|
||||||
T mD[N];
|
T mD[N];
|
||||||
};
|
};
|
||||||
|
|
||||||
|
// specialize for all kinds of available vector registers:
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
struct MkConsecutive
|
||||||
|
{
|
||||||
|
static inline decltype(auto) make(const T* d);
|
||||||
|
|
||||||
|
static inline decltype(auto) make(T* d);
|
||||||
|
|
||||||
|
template <typename... Args>
|
||||||
|
static inline decltype(auto) makeA(Args&&... args);
|
||||||
|
};
|
||||||
|
|
||||||
|
|
||||||
|
/****************************************
|
||||||
|
* consecutive generating functions *
|
||||||
|
****************************************/
|
||||||
|
|
||||||
template <typename T, class EPosT, SizeT... Is>
|
template <typename T, class EPosT, SizeT... Is>
|
||||||
decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is);
|
inline decltype(auto) vregi(const T* d, const EPosT& pos, std::index_sequence<Is...> is);
|
||||||
|
|
||||||
// specialize for all kinds of available vector registers:
|
|
||||||
template <typename T, class EPosT>
|
template <typename T, class EPosT>
|
||||||
decltype(auto) vreg(const T* d, const EPosT& pos);
|
inline decltype(auto) vreg(const T* d, const EPosT& pos);
|
||||||
|
|
||||||
// specialize for all kinds of available vector registers:
|
|
||||||
template <typename T, class EPosT>
|
template <typename T, class EPosT>
|
||||||
decltype(auto) vreg(T* d, const EPosT& pos);
|
inline decltype(auto) vreg(T* d, const EPosT& pos);
|
||||||
|
|
||||||
|
/******************************
|
||||||
|
* basic operations: plus *
|
||||||
|
******************************/
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator+(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator+(const Consecutive<T,N>& a, const T& b);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator+(const T& a, const Consecutive<T,N>& b);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator+=(const Consecutive<T,N>& a);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator+=(const T& a);
|
||||||
|
|
||||||
|
/*******************************
|
||||||
|
* basic operations: minus *
|
||||||
|
*******************************/
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator-(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator-(const Consecutive<T,N>& a, const T& b);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator-(const T& a, const Consecutive<T,N>& b);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator-=(const Consecutive<T,N>& a);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator-=(const T& a);
|
||||||
|
|
||||||
|
/***********************************
|
||||||
|
* basic operations: muliplies *
|
||||||
|
***********************************/
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator*(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator*(const Consecutive<T,N>& a, const T& b);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator*(const T& a, const Consecutive<T,N>& b);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator*=(const Consecutive<T,N>& a);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator*=(const T& a);
|
||||||
|
|
||||||
|
/*********************************
|
||||||
|
* basic operations: divides *
|
||||||
|
*********************************/
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator/(const Consecutive<T,N>& a, const Consecutive<T,N>& b);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator/(const Consecutive<T,N>& a, const T& b);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator/(const T& a, const Consecutive<T,N>& b);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator/=(const Consecutive<T,N>& a);
|
||||||
|
|
||||||
|
template <typename T, SizeT N>
|
||||||
|
constexpr Consecutive<T,N> operator/=(const T& a);
|
||||||
|
|
||||||
// TODO: Maybe specialize PseudoReg (-> Reg) itself (?)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -6,6 +6,7 @@
|
||||||
#include "xpr/pos_type.h"
|
#include "xpr/pos_type.h"
|
||||||
#include "xpr/op_xpr.h"
|
#include "xpr/op_xpr.h"
|
||||||
#include "op_utility.h"
|
#include "op_utility.h"
|
||||||
|
#include "extensions/extensions.h"
|
||||||
|
|
||||||
namespace CNORXZ
|
namespace CNORXZ
|
||||||
{
|
{
|
||||||
|
|
|
@ -575,10 +575,10 @@ namespace CNORXZ
|
||||||
{
|
{
|
||||||
if constexpr(is_static_pos_type<OPosT1>::value and is_static_pos_type<OPosT2>::value){
|
if constexpr(is_static_pos_type<OPosT1>::value and is_static_pos_type<OPosT2>::value){
|
||||||
if constexpr(sizeof...(OPosTs) != 0){
|
if constexpr(sizeof...(OPosTs) != 0){
|
||||||
return OPosT1().val() < OPosT2().val() and pos_types_consecutive<OPosT2,OPosTs...>::value;
|
return OPosT1().val()+1 == OPosT2().val() and pos_types_consecutive<OPosT2,OPosTs...>::value;
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return OPosT1().val() < OPosT2().val();
|
return OPosT1().val()+1 == OPosT2().val();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return false;
|
return false;
|
||||||
|
|
Loading…
Reference in a new issue