Use Welford one-pass variance algorithm for DataPilot / pivot table
... instead of the naïve algorithm. Short test case with 4 values:
10000000007
10000000011
10000000013
10000000017
Naïve Var: -21845.3333333333
Welford Var: 17.3333314259847
VAR() two-pass: 17.3333333333333
Change-Id: I2f27ab91166551e96c0e467f41bd6e6d49b50295
Reviewed-on: https://gerrit.libreoffice.org/64993
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
diff --git a/sc/inc/dptabres.hxx b/sc/inc/dptabres.hxx
index d28e192..0a20f3c 100644
--- a/sc/inc/dptabres.hxx
+++ b/sc/inc/dptabres.hxx
@@ -24,6 +24,7 @@
#include "dpfilteredcache.hxx"
#include "calcmacros.hxx"
#include "dpitemdata.hxx"
#include "subtotal.hxx"
#include <com/sun/star/sheet/DataPilotFieldOrientation.hpp>
#include <com/sun/star/sheet/DataPilotFieldReference.hpp>
@@ -144,18 +145,19 @@ struct ScDPRelativePos
// Possible values for the nCount member:
// (greater than 0 counts the collected values)
const long SC_DPAGG_EMPTY = 0; // empty during data collection
const long SC_DPAGG_DATA_ERROR = -1; // error during data collection
const long SC_DPAGG_RESULT_EMPTY = -2; // empty result calculated
const long SC_DPAGG_RESULT_VALID = -3; // valid result calculated
const long SC_DPAGG_RESULT_ERROR = -4; // error in calculated result
const sal_Int64 SC_DPAGG_EMPTY = 0; // empty during data collection
const sal_Int64 SC_DPAGG_DATA_ERROR = -1; // error during data collection
const sal_Int64 SC_DPAGG_RESULT_EMPTY = -2; // empty result calculated
const sal_Int64 SC_DPAGG_RESULT_VALID = -3; // valid result calculated
const sal_Int64 SC_DPAGG_RESULT_ERROR = -4; // error in calculated result
class ScDPAggData
{
private:
WelfordRunner maWelford;
double fVal;
double fAux;
long nCount;
sal_Int64 nCount;
std::unique_ptr<ScDPAggData> pChild;
std::vector<double> mSortedValues;
diff --git a/sc/source/core/data/dptabres.cxx b/sc/source/core/data/dptabres.cxx
index 305f6d8..d989875 100644
--- a/sc/source/core/data/dptabres.cxx
+++ b/sc/source/core/data/dptabres.cxx
@@ -428,15 +428,7 @@ void ScDPAggData::Update( const ScDPValue& rNext, ScSubTotalFunc eFunc, const Sc
case SUBTOTAL_FUNC_STDP:
case SUBTOTAL_FUNC_VAR:
case SUBTOTAL_FUNC_VARP:
{
// fAux is used to sum up squares
if ( !SubTotal::SafePlus( fVal, rNext.mfValue ) )
nCount = -1; // -1 for error
double fAdd = rNext.mfValue;
if ( !SubTotal::SafeMult( fAdd, rNext.mfValue ) ||
!SubTotal::SafePlus( fAux, fAdd ) )
nCount = -1; // -1 for error
}
maWelford.update( rNext.mfValue);
break;
case SUBTOTAL_FUNC_MED:
{
@@ -486,14 +478,19 @@ void ScDPAggData::Calculate( ScSubTotalFunc eFunc, const ScDPSubTotalState& rSub
case SUBTOTAL_FUNC_MED:
case SUBTOTAL_FUNC_MAX:
case SUBTOTAL_FUNC_MIN:
bError = ( nCount <= 0 ); // no data is an error
break;
case SUBTOTAL_FUNC_STDP:
case SUBTOTAL_FUNC_VARP:
bError = ( nCount <= 0 ); // no data is an error
assert(bError || nCount == static_cast<sal_Int64>(maWelford.getCount()));
break;
case SUBTOTAL_FUNC_STD:
case SUBTOTAL_FUNC_VAR:
bError = ( nCount < 2 ); // need at least 2 values
assert(bError || nCount == static_cast<sal_Int64>(maWelford.getCount()));
break;
default:
@@ -525,23 +522,33 @@ void ScDPAggData::Calculate( ScSubTotalFunc eFunc, const ScDPSubTotalState& rSub
fResult = fVal / static_cast<double>(nCount);
break;
//TODO: use safe mul for fVal * fVal
case SUBTOTAL_FUNC_STD:
if ( nCount >= 2 )
fResult = sqrt((fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount-1));
{
fResult = maWelford.getVarianceSample();
if (fResult < 0.0)
bError = true;
else
fResult = sqrt( fResult);
}
break;
case SUBTOTAL_FUNC_VAR:
if ( nCount >= 2 )
fResult = (fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount-1);
fResult = maWelford.getVarianceSample();
break;
case SUBTOTAL_FUNC_STDP:
if ( nCount > 0 )
fResult = sqrt((fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount));
{
fResult = maWelford.getVariancePopulation();
if (fResult < 0.0)
bError = true;
else
fResult = sqrt( fResult);
}
break;
case SUBTOTAL_FUNC_VARP:
if ( nCount > 0 )
fResult = (fAux - fVal*fVal/static_cast<double>(nCount)) / static_cast<double>(nCount);
fResult = maWelford.getVariancePopulation();
break;
case SUBTOTAL_FUNC_MED:
{