From 59ffd5adcc0dec6d11ebbd7cff5eaa9c30b798e3 Mon Sep 17 00:00:00 2001 From: Christian Zimmermann Date: Thu, 14 Feb 2019 22:04:33 +0100 Subject: [PATCH] use omp simd in performance comparision loop --- src/tests/op_perf_test.cc | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/src/tests/op_perf_test.cc b/src/tests/op_perf_test.cc index 90af492..5f5de8c 100644 --- a/src/tests/op_perf_test.cc +++ b/src/tests/op_perf_test.cc @@ -195,16 +195,22 @@ namespace { } } std::clock_t begin2 = std::clock(); + double* vrptr = vres.data(); + double* dptr = data.data(); + for(size_t j = 0; j != os; ++j) { for(size_t i = 0; i != os; ++i){ for(size_t a = 0; a != 4; ++a){ for(size_t b = 0; b != 4; ++b){ for(size_t c = 0; c != 4; ++c){ for(size_t d = 0; d != 4; ++d){ - for(size_t p = 0; p != 4; ++p){ - const size_t tidx = i*4*4 + d*4 + p; - const size_t sidx = /*i*65536 +*/ d*4*4*4*4*4*4*4 + a*5*4*4*4*4*4 + b*5*4*4*4 + c*5*4 + p; - vres[tidx] += data[sidx]; + const size_t tidx = i*4*4 + d*4; + const size_t sidx = /*i*65536 +*/ d*4*4*4*4*4*4*4 + a*5*4*4*4*4*4 + b*5*4*4*4 + c*5*4; + double* xvrptr = vrptr + tidx; + double* xdptr = dptr + sidx; +#pragma omp simd aligned(xvrptr, xdptr: 32) + for(int p = 0; p < 4; p++){ + xvrptr[p] += xdptr[p]; } } }