get intrinsics vector size from environment + compile with avx -> works
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
Some checks failed
ci/woodpecker/push/woodpecker Pipeline failed
This commit is contained in:
parent
886c47bc24
commit
eee1805f88
8 changed files with 189 additions and 107 deletions
|
@ -2,11 +2,14 @@ cmake_minimum_required(VERSION 3.0)
|
||||||
|
|
||||||
project(cnorxz)
|
project(cnorxz)
|
||||||
|
|
||||||
|
include(cmake/check_avx.cmake)
|
||||||
|
|
||||||
execute_process(COMMAND bash "-c" "git rev-parse HEAD" OUTPUT_VARIABLE GIT_HASH OUTPUT_STRIP_TRAILING_WHITESPACE)
|
execute_process(COMMAND bash "-c" "git rev-parse HEAD" OUTPUT_VARIABLE GIT_HASH OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
execute_process(COMMAND bash "-c" "git tag -l --sort=refname 'v*' | tail -n1" OUTPUT_VARIABLE GIT_TAG OUTPUT_STRIP_TRAILING_WHITESPACE)
|
execute_process(COMMAND bash "-c" "git tag -l --sort=refname 'v*' | tail -n1" OUTPUT_VARIABLE GIT_TAG OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
execute_process(COMMAND bash "-c" "git rev-list -n 1 ${GIT_TAG}" OUTPUT_VARIABLE GIT_TAG_HASH OUTPUT_STRIP_TRAILING_WHITESPACE)
|
execute_process(COMMAND bash "-c" "git rev-list -n 1 ${GIT_TAG}" OUTPUT_VARIABLE GIT_TAG_HASH OUTPUT_STRIP_TRAILING_WHITESPACE)
|
||||||
message(STATUS "${GIT_HASH}")
|
message(STATUS "${GIT_HASH}")
|
||||||
option(RUN_PIPELINE "" OFF)
|
option(RUN_PIPELINE "" OFF)
|
||||||
|
option(SCALAR_BUILD "" OFF)
|
||||||
if(RUN_PIPELINE)
|
if(RUN_PIPELINE)
|
||||||
set(VERSION "v0.0.0-test")
|
set(VERSION "v0.0.0-test")
|
||||||
else()
|
else()
|
||||||
|
@ -40,6 +43,19 @@ else()
|
||||||
endif()
|
endif()
|
||||||
message(STATUS "found absolute install path '${INSTALL_PATH}'")
|
message(STATUS "found absolute install path '${INSTALL_PATH}'")
|
||||||
|
|
||||||
|
if(NOT ${SCALAR_BUILD})
|
||||||
|
message(STATUS "check for intrinsics")
|
||||||
|
check_avx()
|
||||||
|
if(AVX_AVAIL)
|
||||||
|
message(STATUS "AVX available")
|
||||||
|
add_definitions("-DCXZ_HAVE_AVX")
|
||||||
|
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mavx")
|
||||||
|
else()
|
||||||
|
message(STATUS "AVX not available")
|
||||||
|
endif()
|
||||||
|
endif()
|
||||||
|
|
||||||
|
message(STATUS "check for libraries")
|
||||||
find_package( GTest REQUIRED )
|
find_package( GTest REQUIRED )
|
||||||
if(GTest_FOUND)
|
if(GTest_FOUND)
|
||||||
include_directories(${GTEST_INCLUDE_DIRS})
|
include_directories(${GTEST_INCLUDE_DIRS})
|
||||||
|
|
1
TODO
1
TODO
|
@ -6,7 +6,6 @@ include/array/array_base.cc.h@120: "TODO: check if container format is trivial"
|
||||||
include/array/array_base.cc.h@164: "check further compatibility of index/range format" [check]
|
include/array/array_base.cc.h@164: "check further compatibility of index/range format" [check]
|
||||||
include/array/array_base.cc.h@319: "check further compatibility of index/range format" [check]
|
include/array/array_base.cc.h@319: "check further compatibility of index/range format" [check]
|
||||||
include/base/dtype.cc.h@23: "for tuple use vector<DType>" [comment]
|
include/base/dtype.cc.h@23: "for tuple use vector<DType>" [comment]
|
||||||
include/memory/allocator.h@27: "get from environment" [urgent]
|
|
||||||
include/ranges/index_base.cc.h@110: "if this assert never applies, remove mPtrId (-> Defaults)" [long]
|
include/ranges/index_base.cc.h@110: "if this assert never applies, remove mPtrId (-> Defaults)" [long]
|
||||||
include/ranges/mrange.cc.h@633: "TODO: ZRange (meta and index pos static!)" [long]
|
include/ranges/mrange.cc.h@633: "TODO: ZRange (meta and index pos static!)" [long]
|
||||||
include/ranges/urange.cc.h@366: "else general transform using DType (better than nothing), to be implemented" [urgent]
|
include/ranges/urange.cc.h@366: "else general transform using DType (better than nothing), to be implemented" [urgent]
|
||||||
|
|
24
cmake/check_avx.cmake
Normal file
24
cmake/check_avx.cmake
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
|
||||||
|
macro(check_avx)
|
||||||
|
include(CheckCXXSourceCompiles)
|
||||||
|
set(CMAKE_REQUIRED_FLAGS "-Wall -Werror -Wpedantic -std=c++17 -mavx")
|
||||||
|
check_cxx_source_compiles("
|
||||||
|
#include <immintrin.h>
|
||||||
|
#include <iostream>
|
||||||
|
int main()
|
||||||
|
{
|
||||||
|
const double a[4] = { 0,0,0,0 };
|
||||||
|
const double b[4] = { 0,0,0,0 };
|
||||||
|
double o[4] = { 0,0,0,0 };
|
||||||
|
__m256d av = _mm256_load_pd(a);
|
||||||
|
__m256d bv = _mm256_load_pd(b);
|
||||||
|
__m256d ov = _mm256_add_pd(av, bv);
|
||||||
|
_mm256_store_pd(o, ov);
|
||||||
|
std::cout << o[0] << std::endl;
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
"
|
||||||
|
AVX_AVAIL
|
||||||
|
)
|
||||||
|
endmacro()
|
||||||
|
|
|
@ -29,6 +29,7 @@
|
||||||
#include "uuid.h"
|
#include "uuid.h"
|
||||||
#include "utils.h"
|
#include "utils.h"
|
||||||
#include "config.h"
|
#include "config.h"
|
||||||
|
#include "intrin.h"
|
||||||
|
|
||||||
#include "base.cc.h"
|
#include "base.cc.h"
|
||||||
|
|
||||||
|
|
25
src/include/base/intrin.h
Normal file
25
src/include/base/intrin.h
Normal file
|
@ -0,0 +1,25 @@
|
||||||
|
// -*- C++ -*-
|
||||||
|
/**
|
||||||
|
@file include/base/intrin.h
|
||||||
|
@brief Definitions related to intrinsics
|
||||||
|
**/
|
||||||
|
|
||||||
|
#ifndef __cxz_intrin_h__
|
||||||
|
#define __cxz_intrin_h__
|
||||||
|
|
||||||
|
#define MAX_VSIZE 1
|
||||||
|
|
||||||
|
// AVX:
|
||||||
|
|
||||||
|
#define AVX_BITS 256
|
||||||
|
#define AVX_VSIZE (AVX_BITS/8)
|
||||||
|
#ifdef CXZ_HAVE_AVX
|
||||||
|
#undef MAX_VSIZE
|
||||||
|
#define MAX_VSIZE AVX_VSIZE
|
||||||
|
#endif
|
||||||
|
|
||||||
|
// ...:
|
||||||
|
|
||||||
|
// ...
|
||||||
|
|
||||||
|
#endif
|
|
@ -9,6 +9,7 @@
|
||||||
#include <cassert>
|
#include <cassert>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include "base/types.h"
|
#include "base/types.h"
|
||||||
|
#include "base/intrin.h"
|
||||||
|
|
||||||
#define MIB_SIZE 1024*1024 // 1MiB
|
#define MIB_SIZE 1024*1024 // 1MiB
|
||||||
#define WARN_SIZE MIB_SIZE*100 // 100 MiB
|
#define WARN_SIZE MIB_SIZE*100 // 100 MiB
|
||||||
|
@ -24,7 +25,7 @@ namespace CNORXZ
|
||||||
typedef T value_type;
|
typedef T value_type;
|
||||||
|
|
||||||
static constexpr SizeT type_size = sizeof(T);
|
static constexpr SizeT type_size = sizeof(T);
|
||||||
static constexpr SizeT N = 32; // get from environment!!!
|
static constexpr SizeT N = MAX_VSIZE;
|
||||||
|
|
||||||
struct VX
|
struct VX
|
||||||
{
|
{
|
||||||
|
|
|
@ -10,10 +10,11 @@ namespace CNORXZ
|
||||||
* PlusCC / PlusCX *
|
* PlusCC / PlusCX *
|
||||||
***********************/
|
***********************/
|
||||||
|
|
||||||
constexpr decltype(auto) PlusCC<Double,Double,ND>::eval(const Consecutive<Double,ND>& a,
|
inline decltype(auto)
|
||||||
const Consecutive<Double,ND>& b)
|
PlusCC<Double,Double,AVX::ND>::eval(const Consecutive<Double,AVX::ND>& a,
|
||||||
|
const Consecutive<Double,AVX::ND>& b)
|
||||||
{
|
{
|
||||||
Consecutive<Double,ND> o;
|
Consecutive<Double,AVX::ND> o;
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_load_pd(b.mD);
|
__m256d bv = _mm256_load_pd(b.mD);
|
||||||
__m256d ov = _mm256_add_pd(av, bv);
|
__m256d ov = _mm256_add_pd(av, bv);
|
||||||
|
@ -21,8 +22,9 @@ namespace CNORXZ
|
||||||
return o;
|
return o;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr decltype(auto) PlusCC<Double,Double,ND>::aeval(Consecutive<Double,ND>& a,
|
inline decltype(auto)
|
||||||
const Consecutive<Double,ND>& b)
|
PlusCC<Double,Double,AVX::ND>::aeval(Consecutive<Double,AVX::ND>& a,
|
||||||
|
const Consecutive<Double,AVX::ND>& b)
|
||||||
{
|
{
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_load_pd(b.mD);
|
__m256d bv = _mm256_load_pd(b.mD);
|
||||||
|
@ -32,10 +34,11 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
static constexpr decltype(auto)
|
inline decltype(auto)
|
||||||
PlusCX<Double,X,ND>::eval(const Consecutive<Double,ND>& a, const X& b)
|
PlusCX<Double,X,AVX::ND>::eval(const Consecutive<Double,AVX::ND>& a,
|
||||||
|
const X& b)
|
||||||
{
|
{
|
||||||
Consecutive<Double,ND> o;
|
Consecutive<Double,AVX::ND> o;
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
||||||
__m256d ov = _mm256_add_pd(av, bv);
|
__m256d ov = _mm256_add_pd(av, bv);
|
||||||
|
@ -44,8 +47,8 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
static constexpr decltype(auto)
|
inline decltype(auto)
|
||||||
PlusCX<Double,X,ND>::aeval(Consecutive<Double,ND>& a, const X& b)
|
PlusCX<Double,X,AVX::ND>::aeval(Consecutive<Double,AVX::ND>& a, const X& b)
|
||||||
{
|
{
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
||||||
|
@ -55,10 +58,11 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
static constexpr decltype(auto)
|
inline decltype(auto)
|
||||||
PlusCX<Double,X,ND>::eval(const X& a, const Consecutive<Double,ND>& b)
|
PlusCX<Double,X,AVX::ND>::eval(const X& a,
|
||||||
|
const Consecutive<Double,AVX::ND>& b)
|
||||||
{
|
{
|
||||||
Consecutive<Double,ND> o;
|
Consecutive<Double,AVX::ND> o;
|
||||||
__m256d av = _mm256_set1_pd( static_cast<Double>(a) );
|
__m256d av = _mm256_set1_pd( static_cast<Double>(a) );
|
||||||
__m256d bv = _mm256_load_pd(b.mD);
|
__m256d bv = _mm256_load_pd(b.mD);
|
||||||
__m256d ov = _mm256_add_pd(av, bv);
|
__m256d ov = _mm256_add_pd(av, bv);
|
||||||
|
@ -70,10 +74,11 @@ namespace CNORXZ
|
||||||
* MinusCC / MinusCX *
|
* MinusCC / MinusCX *
|
||||||
*************************/
|
*************************/
|
||||||
|
|
||||||
constexpr decltype(auto) MinusCC<Double,Double,ND>::eval(const Consecutive<Double,ND>& a,
|
inline decltype(auto)
|
||||||
const Consecutive<Double,ND>& b)
|
MinusCC<Double,Double,AVX::ND>::eval(const Consecutive<Double,AVX::ND>& a,
|
||||||
|
const Consecutive<Double,AVX::ND>& b)
|
||||||
{
|
{
|
||||||
Consecutive<Double,ND> o;
|
Consecutive<Double,AVX::ND> o;
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_load_pd(b.mD);
|
__m256d bv = _mm256_load_pd(b.mD);
|
||||||
__m256d ov = _mm256_sub_pd(av, bv);
|
__m256d ov = _mm256_sub_pd(av, bv);
|
||||||
|
@ -81,8 +86,9 @@ namespace CNORXZ
|
||||||
return o;
|
return o;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr decltype(auto) MinusCC<Double,Double,ND>::aeval(Consecutive<Double,ND>& a,
|
inline decltype(auto)
|
||||||
const Consecutive<Double,ND>& b)
|
MinusCC<Double,Double,AVX::ND>::aeval(Consecutive<Double,AVX::ND>& a,
|
||||||
|
const Consecutive<Double,AVX::ND>& b)
|
||||||
{
|
{
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_load_pd(b.mD);
|
__m256d bv = _mm256_load_pd(b.mD);
|
||||||
|
@ -92,10 +98,11 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
static constexpr decltype(auto)
|
inline decltype(auto)
|
||||||
MinusCX<Double,X,ND>::eval(const Consecutive<Double,ND>& a, const X& b)
|
MinusCX<Double,X,AVX::ND>::eval(const Consecutive<Double,AVX::ND>& a,
|
||||||
|
const X& b)
|
||||||
{
|
{
|
||||||
Consecutive<Double,ND> o;
|
Consecutive<Double,AVX::ND> o;
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
||||||
__m256d ov = _mm256_sub_pd(av, bv);
|
__m256d ov = _mm256_sub_pd(av, bv);
|
||||||
|
@ -104,8 +111,8 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
static constexpr decltype(auto)
|
inline decltype(auto)
|
||||||
MinusCX<Double,X,ND>::aeval(Consecutive<Double,ND>& a, const X& b)
|
MinusCX<Double,X,AVX::ND>::aeval(Consecutive<Double,AVX::ND>& a, const X& b)
|
||||||
{
|
{
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
||||||
|
@ -115,10 +122,11 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
static constexpr decltype(auto)
|
inline decltype(auto)
|
||||||
MinusCX<Double,X,ND>::eval(const X& a, const Consecutive<Double,ND>& b)
|
MinusCX<Double,X,AVX::ND>::eval(const X& a,
|
||||||
|
const Consecutive<Double,AVX::ND>& b)
|
||||||
{
|
{
|
||||||
Consecutive<Double,ND> o;
|
Consecutive<Double,AVX::ND> o;
|
||||||
__m256d av = _mm256_set1_pd( static_cast<Double>(a) );
|
__m256d av = _mm256_set1_pd( static_cast<Double>(a) );
|
||||||
__m256d bv = _mm256_load_pd(b.mD);
|
__m256d bv = _mm256_load_pd(b.mD);
|
||||||
__m256d ov = _mm256_sub_pd(av, bv);
|
__m256d ov = _mm256_sub_pd(av, bv);
|
||||||
|
@ -130,10 +138,11 @@ namespace CNORXZ
|
||||||
* MultipliesCC / MultipliesCX *
|
* MultipliesCC / MultipliesCX *
|
||||||
***********************************/
|
***********************************/
|
||||||
|
|
||||||
constexpr decltype(auto) MultipliesCC<Double,Double,ND>::eval(const Consecutive<Double,ND>& a,
|
inline decltype(auto)
|
||||||
const Consecutive<Double,ND>& b)
|
MultipliesCC<Double,Double,AVX::ND>::eval(const Consecutive<Double,AVX::ND>& a,
|
||||||
|
const Consecutive<Double,AVX::ND>& b)
|
||||||
{
|
{
|
||||||
Consecutive<Double,ND> o;
|
Consecutive<Double,AVX::ND> o;
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_load_pd(b.mD);
|
__m256d bv = _mm256_load_pd(b.mD);
|
||||||
__m256d ov = _mm256_mul_pd(av, bv);
|
__m256d ov = _mm256_mul_pd(av, bv);
|
||||||
|
@ -141,8 +150,9 @@ namespace CNORXZ
|
||||||
return o;
|
return o;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr decltype(auto) MultipliesCC<Double,Double,ND>::aeval(Consecutive<Double,ND>& a,
|
inline decltype(auto)
|
||||||
const Consecutive<Double,ND>& b)
|
MultipliesCC<Double,Double,AVX::ND>::aeval(Consecutive<Double,AVX::ND>& a,
|
||||||
|
const Consecutive<Double,AVX::ND>& b)
|
||||||
{
|
{
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_load_pd(b.mD);
|
__m256d bv = _mm256_load_pd(b.mD);
|
||||||
|
@ -152,10 +162,11 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
static constexpr decltype(auto)
|
inline decltype(auto)
|
||||||
MultipliesCX<Double,X,ND>::eval(const Consecutive<Double,ND>& a, const X& b)
|
MultipliesCX<Double,X,AVX::ND>::eval(const Consecutive<Double,AVX::ND>& a,
|
||||||
|
const X& b)
|
||||||
{
|
{
|
||||||
Consecutive<Double,ND> o;
|
Consecutive<Double,AVX::ND> o;
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
||||||
__m256d ov = _mm256_mul_pd(av, bv);
|
__m256d ov = _mm256_mul_pd(av, bv);
|
||||||
|
@ -164,8 +175,9 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
static constexpr decltype(auto)
|
inline decltype(auto)
|
||||||
MultipliesCX<Double,X,ND>::aeval(Consecutive<Double,ND>& a, const X& b)
|
MultipliesCX<Double,X,AVX::ND>::aeval(Consecutive<Double,AVX::ND>& a,
|
||||||
|
const X& b)
|
||||||
{
|
{
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
||||||
|
@ -175,10 +187,11 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
static constexpr decltype(auto)
|
inline decltype(auto)
|
||||||
MultipliesCX<Double,X,ND>::eval(const X& a, const Consecutive<Double,ND>& b)
|
MultipliesCX<Double,X,AVX::ND>::eval(const X& a,
|
||||||
|
const Consecutive<Double,AVX::ND>& b)
|
||||||
{
|
{
|
||||||
Consecutive<Double,ND> o;
|
Consecutive<Double,AVX::ND> o;
|
||||||
__m256d av = _mm256_set1_pd( static_cast<Double>(a) );
|
__m256d av = _mm256_set1_pd( static_cast<Double>(a) );
|
||||||
__m256d bv = _mm256_load_pd(b.mD);
|
__m256d bv = _mm256_load_pd(b.mD);
|
||||||
__m256d ov = _mm256_mul_pd(av, bv);
|
__m256d ov = _mm256_mul_pd(av, bv);
|
||||||
|
@ -191,10 +204,11 @@ namespace CNORXZ
|
||||||
* DividesCC / DividesCX *
|
* DividesCC / DividesCX *
|
||||||
*****************************/
|
*****************************/
|
||||||
|
|
||||||
constexpr decltype(auto) DividesCC<Double,Double,ND>::eval(const Consecutive<Double,ND>& a,
|
inline decltype(auto)
|
||||||
const Consecutive<Double,ND>& b)
|
DividesCC<Double,Double,AVX::ND>::eval(const Consecutive<Double,AVX::ND>& a,
|
||||||
|
const Consecutive<Double,AVX::ND>& b)
|
||||||
{
|
{
|
||||||
Consecutive<Double,ND> o;
|
Consecutive<Double,AVX::ND> o;
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_load_pd(b.mD);
|
__m256d bv = _mm256_load_pd(b.mD);
|
||||||
__m256d ov = _mm256_div_pd(av, bv);
|
__m256d ov = _mm256_div_pd(av, bv);
|
||||||
|
@ -202,8 +216,9 @@ namespace CNORXZ
|
||||||
return o;
|
return o;
|
||||||
}
|
}
|
||||||
|
|
||||||
constexpr decltype(auto) DividesCC<Double,Double,ND>::aeval(Consecutive<Double,ND>& a,
|
inline decltype(auto)
|
||||||
const Consecutive<Double,ND>& b)
|
DividesCC<Double,Double,AVX::ND>::aeval(Consecutive<Double,AVX::ND>& a,
|
||||||
|
const Consecutive<Double,AVX::ND>& b)
|
||||||
{
|
{
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_load_pd(b.mD);
|
__m256d bv = _mm256_load_pd(b.mD);
|
||||||
|
@ -213,10 +228,11 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
static constexpr decltype(auto)
|
inline decltype(auto)
|
||||||
DividesCX<Double,X,ND>::eval(const Consecutive<Double,ND>& a, const X& b)
|
DividesCX<Double,X,AVX::ND>::eval(const Consecutive<Double,AVX::ND>& a,
|
||||||
|
const X& b)
|
||||||
{
|
{
|
||||||
Consecutive<Double,ND> o;
|
Consecutive<Double,AVX::ND> o;
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
||||||
__m256d ov = _mm256_div_pd(av, bv);
|
__m256d ov = _mm256_div_pd(av, bv);
|
||||||
|
@ -225,8 +241,8 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
static constexpr decltype(auto)
|
inline decltype(auto)
|
||||||
DividesCX<Double,X,ND>::aeval(Consecutive<Double,ND>& a, const X& b)
|
DividesCX<Double,X,AVX::ND>::aeval(Consecutive<Double,AVX::ND>& a, const X& b)
|
||||||
{
|
{
|
||||||
__m256d av = _mm256_load_pd(a.mD);
|
__m256d av = _mm256_load_pd(a.mD);
|
||||||
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
__m256d bv = _mm256_set1_pd( static_cast<Double>(b) );
|
||||||
|
@ -236,10 +252,11 @@ namespace CNORXZ
|
||||||
}
|
}
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
static constexpr decltype(auto)
|
inline decltype(auto)
|
||||||
DividesCX<Double,X,ND>::eval(const X& a, const Consecutive<Double,ND>& b)
|
DividesCX<Double,X,AVX::ND>::eval(const X& a,
|
||||||
|
const Consecutive<Double,AVX::ND>& b)
|
||||||
{
|
{
|
||||||
Consecutive<Double,ND> o;
|
Consecutive<Double,AVX::ND> o;
|
||||||
__m256d av = _mm256_set1_pd( static_cast<Double>(a) );
|
__m256d av = _mm256_set1_pd( static_cast<Double>(a) );
|
||||||
__m256d bv = _mm256_load_pd(b.mD);
|
__m256d bv = _mm256_load_pd(b.mD);
|
||||||
__m256d ov = _mm256_div_pd(av, bv);
|
__m256d ov = _mm256_div_pd(av, bv);
|
||||||
|
|
|
@ -6,105 +6,104 @@
|
||||||
|
|
||||||
#include "base/base.h"
|
#include "base/base.h"
|
||||||
|
|
||||||
#define AVX_VSIZE (256/8)
|
|
||||||
|
|
||||||
namespace CNORXZ
|
namespace CNORXZ
|
||||||
{
|
{
|
||||||
namespace AVX
|
namespace AVX
|
||||||
{
|
{
|
||||||
static constexpr SizeT ND = AVX_VSIZE/sizeof(Double);
|
static constexpr SizeT ND = AVX_VSIZE/sizeof(Double);
|
||||||
|
static constexpr SizeT NF = AVX_VSIZE/sizeof(float);
|
||||||
}
|
}
|
||||||
|
|
||||||
template <>
|
|
||||||
struct PlusCC<Double,Double,ND>
|
|
||||||
{
|
|
||||||
static constexpr decltype(auto)
|
|
||||||
eval(const Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
|
|
||||||
|
|
||||||
static constexpr decltype(auto)
|
template <>
|
||||||
aeval(Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
|
struct PlusCC<Double,Double,AVX::ND>
|
||||||
|
{
|
||||||
|
static inline decltype(auto)
|
||||||
|
eval(const Consecutive<Double,AVX::ND>& a, const Consecutive<Double,AVX::ND>& b);
|
||||||
|
|
||||||
|
static inline decltype(auto)
|
||||||
|
aeval(Consecutive<Double,AVX::ND>& a, const Consecutive<Double,AVX::ND>& b);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
struct PlusCX<Double,X,ND>
|
struct PlusCX<Double,X,AVX::ND>
|
||||||
{
|
{
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
eval(const Consecutive<Double,ND>& a, const X& b);
|
eval(const Consecutive<Double,AVX::ND>& a, const X& b);
|
||||||
|
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
aeval(Consecutive<Double,ND>& a, const X& b);
|
aeval(Consecutive<Double,AVX::ND>& a, const X& b);
|
||||||
|
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
eval(const X& a, const Consecutive<Double,ND>& b);
|
eval(const X& a, const Consecutive<Double,AVX::ND>& b);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct MinusCC<Double,Double,ND>
|
struct MinusCC<Double,Double,AVX::ND>
|
||||||
{
|
{
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
eval(const Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
|
eval(const Consecutive<Double,AVX::ND>& a, const Consecutive<Double,AVX::ND>& b);
|
||||||
|
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
aeval(Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
|
aeval(Consecutive<Double,AVX::ND>& a, const Consecutive<Double,AVX::ND>& b);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
struct MinusCX<Double,X,ND>
|
struct MinusCX<Double,X,AVX::ND>
|
||||||
{
|
{
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
eval(const Consecutive<Double,ND>& a, const X& b);
|
eval(const Consecutive<Double,AVX::ND>& a, const X& b);
|
||||||
|
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
aeval(Consecutive<Double,ND>& a, const X& b);
|
aeval(Consecutive<Double,AVX::ND>& a, const X& b);
|
||||||
|
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
eval(const X& a, const Consecutive<Double,ND>& b);
|
eval(const X& a, const Consecutive<Double,AVX::ND>& b);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct MultipliesCC<Double,Double,ND>
|
struct MultipliesCC<Double,Double,AVX::ND>
|
||||||
{
|
{
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
eval(const Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
|
eval(const Consecutive<Double,AVX::ND>& a, const Consecutive<Double,AVX::ND>& b);
|
||||||
|
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
aeval(Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
|
aeval(Consecutive<Double,AVX::ND>& a, const Consecutive<Double,AVX::ND>& b);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
struct MultipliesCX<Double,X,ND>
|
struct MultipliesCX<Double,X,AVX::ND>
|
||||||
{
|
{
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
eval(const Consecutive<Double,ND>& a, const X& b);
|
eval(const Consecutive<Double,AVX::ND>& a, const X& b);
|
||||||
|
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
aeval(Consecutive<Double,ND>& a, const X& b);
|
aeval(Consecutive<Double,AVX::ND>& a, const X& b);
|
||||||
|
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
eval(const X& a, const Consecutive<Double,ND>& b);
|
eval(const X& a, const Consecutive<Double,AVX::ND>& b);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <>
|
template <>
|
||||||
struct DividesCC<Double,Double,ND>
|
struct DividesCC<Double,Double,AVX::ND>
|
||||||
{
|
{
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
eval(const Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
|
eval(const Consecutive<Double,AVX::ND>& a, const Consecutive<Double,AVX::ND>& b);
|
||||||
|
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
aeval(Consecutive<Double,ND>& a, const Consecutive<Double,ND>& b);
|
aeval(Consecutive<Double,AVX::ND>& a, const Consecutive<Double,AVX::ND>& b);
|
||||||
};
|
};
|
||||||
|
|
||||||
template <typename X>
|
template <typename X>
|
||||||
struct DividesCX<Double,X,ND>
|
struct DividesCX<Double,X,AVX::ND>
|
||||||
{
|
{
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
eval(const Consecutive<Double,ND>& a, const X& b);
|
eval(const Consecutive<Double,AVX::ND>& a, const X& b);
|
||||||
|
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
aeval(Consecutive<Double,ND>& a, const X& b);
|
aeval(Consecutive<Double,AVX::ND>& a, const X& b);
|
||||||
|
|
||||||
static constexpr decltype(auto)
|
static inline decltype(auto)
|
||||||
eval(const X& a, const Consecutive<Double,ND>& b);
|
eval(const X& a, const Consecutive<Double,AVX::ND>& b);
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in a new issue