summaryrefslogtreecommitdiff
path: root/sc/source/core
diff options
context:
space:
mode:
authordante <dante19031999@gmail.com>2021-05-04 21:09:57 +0200
committerTomaž Vajngerl <quikee@gmail.com>2021-05-10 12:20:11 +0200
commita25c9a2925e64f3adb46a749ce18393aa01b1870 (patch)
tree3f76e1159bd3efe8af361b3d809d06df07ddf624 /sc/source/core
parentbdba387bad9dea718e21326ddf025be7af383cfd (diff)
tdf#137679 Use KahanSum for SSE2
Change-Id: I97970cbb7a9562081f9a84b1d81423c80ed7f7f7 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/115113 Tested-by: Jenkins Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
Diffstat (limited to 'sc/source/core')
-rw-r--r--sc/source/core/tool/arraysumSSE2.cxx52
1 files changed, 46 insertions, 6 deletions
diff --git a/sc/source/core/tool/arraysumSSE2.cxx b/sc/source/core/tool/arraysumSSE2.cxx
index 894675335834..e69f672b6014 100644
--- a/sc/source/core/tool/arraysumSSE2.cxx
+++ b/sc/source/core/tool/arraysumSSE2.cxx
@@ -27,25 +27,65 @@ double ArraySumFunctor::executeSSE2(size_t& i, const double* pCurrent) const
__m128d sum3 = _mm_setzero_pd();
__m128d sum4 = _mm_setzero_pd();
+ __m128d err1 = _mm_setzero_pd();
+ __m128d err2 = _mm_setzero_pd();
+ __m128d err3 = _mm_setzero_pd();
+ __m128d err4 = _mm_setzero_pd();
+
+ __m128d y, t;
+
for (; i < nUnrolledSize; i += 8)
{
+ // Kahan sum 1
__m128d load1 = _mm_load_pd(pCurrent);
- sum1 = _mm_add_pd(sum1, load1);
+ y = _mm_sub_pd(load1, err1);
+ t = _mm_add_pd(sum1, y);
+ err1 = _mm_sub_pd(_mm_sub_pd(t, sum1), y);
+ sum1 = t;
pCurrent += 2;
+ // Kahan sum 2
__m128d load2 = _mm_load_pd(pCurrent);
- sum2 = _mm_add_pd(sum2, load2);
+ y = _mm_sub_pd(load2, err2);
+ t = _mm_add_pd(sum2, y);
+ err2 = _mm_sub_pd(_mm_sub_pd(t, sum2), y);
+ sum2 = t;
pCurrent += 2;
+ // Kahan sum 3
__m128d load3 = _mm_load_pd(pCurrent);
- sum3 = _mm_add_pd(sum3, load3);
+ y = _mm_sub_pd(load3, err3);
+ t = _mm_add_pd(sum3, y);
+ err3 = _mm_sub_pd(_mm_sub_pd(t, sum3), y);
+ sum3 = t;
pCurrent += 2;
+ // Kahan sum 4
__m128d load4 = _mm_load_pd(pCurrent);
- sum4 = _mm_add_pd(sum4, load4);
+ y = _mm_sub_pd(load4, err4);
+ t = _mm_add_pd(sum4, y);
+ err4 = _mm_sub_pd(_mm_sub_pd(t, sum4), y);
+ sum4 = t;
pCurrent += 2;
}
- sum1 = _mm_add_pd(_mm_add_pd(sum1, sum2), _mm_add_pd(sum3, sum4));
+
+ // Now we combine pairwise summation with Kahan summation
+
+ // sum 1 + sum 2
+ y = _mm_sub_pd(sum2, err1);
+ t = _mm_add_pd(sum1, y);
+ err1 = _mm_sub_pd(_mm_sub_pd(t, sum1), y);
+ sum1 = t;
+
+ // sum 3 + sum 4
+ y = _mm_sub_pd(sum4, err3);
+ t = _mm_add_pd(sum3, y);
+ sum3 = t;
+
+ // sum 1 + sum 3
+ y = _mm_sub_pd(sum3, err1);
+ t = _mm_add_pd(sum1, y);
+ sum1 = t;
double temp;
@@ -62,4 +102,4 @@ double ArraySumFunctor::executeSSE2(size_t& i, const double* pCurrent) const
return 0.0;
#endif
}
-} \ No newline at end of file
+}