diff --git a/CMakeLists.txt b/CMakeLists.txt index 0342bcc..d3d5c2a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 2.8) project(multi_array) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -std=c++11 -Wpedantic -O3 -g") +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Werror -std=c++11 -Wpedantic -O3 -g -march=native") enable_testing() diff --git a/src/include/mbase_def.h b/src/include/mbase_def.h index f3fb0bb..6d569c5 100644 --- a/src/include/mbase_def.h +++ b/src/include/mbase_def.h @@ -51,6 +51,14 @@ namespace MultiArrayTools // multi_array_operation.h template class Contraction; + + // slice.h + template + class Slice; + + // slice.h + template + class SliceDef; } #endif diff --git a/src/include/pack_num.h b/src/include/pack_num.h index eed17c4..21d85dd 100644 --- a/src/include/pack_num.h +++ b/src/include/pack_num.h @@ -51,7 +51,7 @@ namespace MultiArrayHelper } template - static auto mkLoop( const OpTuple& ot, Expr&& exp ) + static auto mkLoop( const OpTuple& ot, Expr exp ) -> decltype(std::get(ot).loop( PackNum::mkLoop(ot,exp) )) { return std::get(ot).loop( PackNum::mkLoop(ot,exp) ); @@ -92,7 +92,7 @@ namespace MultiArrayHelper } template - static auto mkLoop( const OpTuple& ot, Expr&& exp ) + static auto mkLoop( const OpTuple& ot, Expr exp ) -> decltype(std::get<0>(ot).loop( exp )) { return std::get<0>(ot).loop( exp ); diff --git a/src/include/slice.h b/src/include/slice.h index cf8d0d7..ec6ff8f 100644 --- a/src/include/slice.h +++ b/src/include/slice.h @@ -35,11 +35,26 @@ namespace MultiArrayTools template auto define(const std::shared_ptr&... inds) - -> SliceDef >; + -> SliceDef; private: T* mData; }; + + + template + class SliceDef + { + private: + SliceDef() = default; + + public: + SliceDef(Slice& sl, + const std::shared_ptr&... inds); + + template + SliceDef& operator=(OperationRoot& op); + }; } // end namespace MultiArrayTools @@ -122,8 +137,14 @@ namespace MultiArrayTools //i = std::get(mBlockSizes); return i.setData(data()); } - + /* + SliceDef:: + SliceDef(Slice& sl, + const std::shared_ptr&... inds); + template + SliceDef& SliceDef::operator=(OperationRoot& op); + */ } // end namespace MultiArrayTools diff --git a/src/tests/op_perf_test.cc b/src/tests/op_perf_test.cc index b08775e..f42ecfd 100644 --- a/src/tests/op_perf_test.cc +++ b/src/tests/op_perf_test.cc @@ -174,24 +174,28 @@ namespace { << std::endl; std::vector vres(4*4); - - std::clock_t begin2 = std::clock(); - for(size_t i = 0; i != 1000; ++i){ for(size_t d = 0; d != 4; ++d){ for(size_t p = 0; p != 4; ++p){ const size_t tidx = d*4 + p; vres[tidx] = 0.; - for(size_t a = 0; a != 4; ++a){ - for(size_t b = 0; b != 4; ++b){ - for(size_t c = 0; c != 4; ++c){ - const size_t sidx = d*4*4*4*4*4*4*4 + a*5*4*4*4*4*4 + b*5*4*4*4 + + c*5*4 + p; - vres[tidx] += data[sidx]; + } + } + std::clock_t begin2 = std::clock(); + for(size_t i = 0; i != 1000; ++i){ + for(size_t a = 0; a != 4; ++a){ + for(size_t b = 0; b != 4; ++b){ + for(size_t c = 0; c != 4; ++c){ + for(size_t d = 0; d != 4; ++d){ + for(size_t p = 0; p != 4; ++p){ + const size_t tidx = d*4 + p; + const size_t sidx = d*4*4*4*4*4*4*4 + a*5*4*4*4*4*4 + b*5*4*4*4 + + c*5*4 + p; + vres[tidx] += data[sidx]; + } } } } } } - } std::clock_t end2 = std::clock(); assert( xround(res1.at(mkts(0,0))) == xround(vres[0]) );