Use Welford one-pass variance algorithm for DataPilot / pivot table

... instead of the naïve algorithm. Short test case with 4 values: 10000000007 10000000011 10000000013 10000000017 Naïve Var: -21845.3333333333 Welford Var: 17.3333314259847 VAR() two-pass: 17.3333333333333 Change-Id: I2f27ab91166551e96c0e467f41bd6e6d49b50295 Reviewed-on: https://gerrit.libreoffice.org/64993 Reviewed-by: Eike Rathke <erack@redhat.com> Tested-by: Jenkins
author: Eike Rathke <erack@redhat.com> 2018-12-11 23:48:37 +0100
committer: Eike Rathke <erack@redhat.com> 2018-12-12 01:26:01 +0100
commit: 3c964980da07892a02d5ac721d80558c459532d0 (patch)
tree: 6aad0644fba0f8372cce7e053c38ba8ed292eadc
parent: d1363c39201e93aa83028fd837fe12f48f79f63b (diff)
2 files changed, 30 insertions, 21 deletions
diff --git a/sc/inc/dptabres.hxx b/sc/inc/dptabres.hxx
index d28e192863c6..0a20f3cffa76 100644
--- a/sc/inc/dptabres.hxx
+++ b/sc/inc/dptabres.hxx
@@ -24,6 +24,7 @@
 #include "dpfilteredcache.hxx"
 #include "calcmacros.hxx"
 #include "dpitemdata.hxx"
+#include "subtotal.hxx"
 
 #include <com/sun/star/sheet/DataPilotFieldOrientation.hpp>
 #include <com/sun/star/sheet/DataPilotFieldReference.hpp>
@@ -144,18 +145,19 @@ struct ScDPRelativePos
 
 //  Possible values for the nCount member:
 //  (greater than 0 counts the collected values)
-const long SC_DPAGG_EMPTY        =  0;  // empty during data collection
-const long SC_DPAGG_DATA_ERROR   = -1;  // error during data collection
-const long SC_DPAGG_RESULT_EMPTY = -2;  // empty result calculated
-const long SC_DPAGG_RESULT_VALID = -3;  // valid result calculated
-const long SC_DPAGG_RESULT_ERROR = -4;  // error in calculated result
+const sal_Int64 SC_DPAGG_EMPTY        =  0;  // empty during data collection
+const sal_Int64 SC_DPAGG_DATA_ERROR   = -1;  // error during data collection
+const sal_Int64 SC_DPAGG_RESULT_EMPTY = -2;  // empty result calculated
+const sal_Int64 SC_DPAGG_RESULT_VALID = -3;  // valid result calculated
+const sal_Int64 SC_DPAGG_RESULT_ERROR = -4;  // error in calculated result
 
 class ScDPAggData
 {
 private:
+    WelfordRunner   maWelford;
     double          fVal;
     double          fAux;
-    long            nCount;
+    sal_Int64       nCount;
     std::unique_ptr<ScDPAggData> pChild;
     std::vector<double> mSortedValues;
 
diff --git a/sc/source/core/data/dptabres.cxx b/sc/source/core/data/dptabres.cxx
index 305f6d8ac711..d989875b7359 100644
--- a/sc/source/core/data/dptabres.cxx
+++ b/sc/source/core/data/dptabres.cxx
@@ -428,15 +428,7 @@ void ScDPAggData::Update( const ScDPValue& rNext, ScSubTotalFunc eFunc, const Sc
         case SUBTOTAL_FUNC_STDP:
         case SUBTOTAL_FUNC_VAR:
         case SUBTOTAL_FUNC_VARP:
-            {
-                // fAux is used to sum up squares
-                if ( !SubTotal::SafePlus( fVal, rNext.mfValue ) )
-                    nCount = -1;                            // -1 for error
-                double fAdd = rNext.mfValue;
-                if ( !SubTotal::SafeMult( fAdd, rNext.mfValue ) ||
-                     !SubTotal::SafePlus( fAux, fAdd ) )
-                    nCount = -1;                            // -1 for error
-            }
+            maWelford.update( rNext.mfValue);
             break;
         case SUBTOTAL_FUNC_MED:
             {
@@ -486,14 +478,19 @@ void ScDPAggData::Calculate( ScSubTotalFunc eFunc, const ScDPSubTotalState& rSub
         case SUBTOTAL_FUNC_MED:
         case SUBTOTAL_FUNC_MAX:
         case SUBTOTAL_FUNC_MIN:
+            bError = ( nCount <= 0 );       // no data is an error
+            break;
+
         case SUBTOTAL_FUNC_STDP:
         case SUBTOTAL_FUNC_VARP:
             bError = ( nCount <= 0 );       // no data is an error
+            assert(bError || nCount == static_cast<sal_Int64>(maWelford.getCount()));
             break;
 
         case SUBTOTAL_FUNC_STD:
         case SUBTOTAL_FUNC_VAR:
             bError = ( nCount < 2 );        // need at least 2 values
+            assert(bError || nCount == static_cast<sal_Int64>(maWelford.getCount()));
             break;
 
         default:
@@ -525,23 +522,33 @@ void ScDPAggData::Calculate( ScSubTotalFunc eFunc, const ScDPSubTotalState& rSub
                     fResult = fVal / static_cast<double>(nCount);
                 break;
 
-            //TODO: use safe mul for fVal * fVal
-
             case SUBTOTAL_FUNC_STD:
                 if ( nCount >= 2 )
-                    fResult = sqrt((fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount-1));
+                {
+                    fResult = maWelford.getVarianceSample();
+                    if (fResult < 0.0)
+                        bError = true;
+                    else
+                        fResult = sqrt( fResult);
+                }
                 break;
             case SUBTOTAL_FUNC_VAR:
                 if ( nCount >= 2 )
-                    fResult = (fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount-1);
+                    fResult = maWelford.getVarianceSample();
                 break;
             case SUBTOTAL_FUNC_STDP:
                 if ( nCount > 0 )
-                    fResult = sqrt((fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount));
+                {
+                    fResult = maWelford.getVariancePopulation();
+                    if (fResult < 0.0)
+                        bError = true;
+                    else
+                        fResult = sqrt( fResult);
+                }
                 break;
             case SUBTOTAL_FUNC_VARP:
                 if ( nCount > 0 )
-                    fResult = (fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount);
+                    fResult = maWelford.getVariancePopulation();
                 break;
             case SUBTOTAL_FUNC_MED:
                 {
author	Eike Rathke <erack@redhat.com>	2018-12-11 23:48:37 +0100
committer	Eike Rathke <erack@redhat.com>	2018-12-12 01:26:01 +0100
commit	3c964980da07892a02d5ac721d80558c459532d0 (patch)
tree	6aad0644fba0f8372cce7e053c38ba8ed292eadc
parent	d1363c39201e93aa83028fd837fe12f48f79f63b (diff)