tdf#133858 reduce the double-ref range to data content
in certain matrix formulas like SUM(IF(A1=G:G, H:H)*B1/B2) where whole
columns are used for comparison in the condition of IF ultimately
followed by a reducer like SUM. In such cases we can safely reduce the
double-refs involved in the comparison to the sheet area where there is
data before converting the data to ScMatrix.
This is a more restricted version of Noel's fix in
37ffe509ef011357123642577c04ff296d59ce68
Change-Id: I1c2e8985adedb3f4c4648f541fb0e8e7d0fae033
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109050
Reviewed-by: Luboš Luňák <l.lunak@collabora.com>
Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com>
(cherry picked from commit 65167a9265acfea04733b5ff6ee3220a9da624f4)
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109118
Tested-by: Jenkins
Reviewed-by: Dennis Francis <dennis.francis@collabora.com>
diff --git a/formula/source/core/api/FormulaCompiler.cxx b/formula/source/core/api/FormulaCompiler.cxx
index bd1d1f0..73669e8 100644
--- a/formula/source/core/api/FormulaCompiler.cxx
+++ b/formula/source/core/api/FormulaCompiler.cxx
@@ -1685,18 +1685,25 @@ void FormulaCompiler::Factor()
HandleIIOpCode(pFacToken, pArgArray,
std::min(nSepCount, static_cast<sal_uInt32>(FORMULA_MAXPARAMSII)));
}
bool bDone = false;
if (bBadName)
; // nothing, keep current token for return
else if (eOp != ocClose)
SetError( FormulaError::PairExpected);
else
{
NextToken();
bDone = true;
}
// Jumps are just normal functions for the FunctionAutoPilot tree view
if (!mbJumpCommandReorder && pFacToken->GetType() == svJump)
pFacToken = new FormulaFAPToken( pFacToken->GetOpCode(), nSepCount, pFacToken );
else
pFacToken->SetByte( nSepCount );
PutCode( pFacToken );
if (bDone)
AnnotateOperands();
}
else if (IsOpCodeJumpCommand(eOp))
{
diff --git a/include/formula/FormulaCompiler.hxx b/include/formula/FormulaCompiler.hxx
index 9fdd5a5..c455ca4 100644
--- a/include/formula/FormulaCompiler.hxx
+++ b/include/formula/FormulaCompiler.hxx
@@ -334,6 +334,8 @@ protected:
// Called from CompileTokenArray() after RPN code generation is done.
virtual void PostProcessCode() {}
virtual void AnnotateOperands() {}
OUString aCorrectedFormula; // autocorrected Formula
OUString aCorrectedSymbol; // autocorrected Symbol
diff --git a/sc/inc/compiler.hxx b/sc/inc/compiler.hxx
index 5299cf5..b051f3a 100644
--- a/sc/inc/compiler.hxx
+++ b/sc/inc/compiler.hxx
@@ -514,10 +514,12 @@ private:
bool HandleIIOpCodeInternal(formula::FormulaToken* token, formula::FormulaToken*** pppToken, sal_uInt8 nNumParams);
bool SkipImplicitIntersectionOptimization(const formula::FormulaToken* token) const;
virtual void PostProcessCode() override;
virtual void AnnotateOperands() override;
static bool ParameterMayBeImplicitIntersection(const formula::FormulaToken* token, int parameter);
void ReplaceDoubleRefII(formula::FormulaToken** ppDoubleRefTok);
bool AdjustSumRangeShape(const ScComplexRefData& rBaseRange, ScComplexRefData& rSumRange);
void CorrectSumRange(const ScComplexRefData& rBaseRange, ScComplexRefData& rSumRange, formula::FormulaToken** ppSumRangeToken);
void AnnotateTrimOnDoubleRefs();
};
#endif
diff --git a/sc/inc/refdata.hxx b/sc/inc/refdata.hxx
index a2c9313..9b57fe8 100644
--- a/sc/inc/refdata.hxx
+++ b/sc/inc/refdata.hxx
@@ -124,6 +124,11 @@ struct ScComplexRefData
{
ScSingleRefData Ref1;
ScSingleRefData Ref2;
bool bTrimToData;
ScComplexRefData() :
bTrimToData(false)
{}
void InitFlags()
{ Ref1.InitFlags(); Ref2.InitFlags(); }
@@ -198,6 +203,9 @@ struct ScComplexRefData
bool IsDeleted() const;
bool IsTrimToData() const { return bTrimToData; }
void SetTrimToData(bool bSet) { bTrimToData = bSet; }
#if DEBUG_FORMULA_COMPILER
void Dump( int nIndent = 0 ) const;
#endif
diff --git a/sc/qa/unit/ucalc.hxx b/sc/qa/unit/ucalc.hxx
index e72e2e9..7c38201 100644
--- a/sc/qa/unit/ucalc.hxx
+++ b/sc/qa/unit/ucalc.hxx
@@ -159,6 +159,7 @@ public:
void testFormulaCompilerImplicitIntersection1ParamWithChange();
void testFormulaCompilerImplicitIntersection1NoGroup();
void testFormulaCompilerImplicitIntersectionOperators();
void testFormulaAnnotateTrimOnDoubleRefs();
void testFormulaRefUpdate();
void testFormulaRefUpdateRange();
void testFormulaRefUpdateSheets();
@@ -607,6 +608,7 @@ public:
CPPUNIT_TEST(testFormulaCompilerImplicitIntersection1ParamWithChange);
CPPUNIT_TEST(testFormulaCompilerImplicitIntersection1NoGroup);
CPPUNIT_TEST(testFormulaCompilerImplicitIntersectionOperators);
CPPUNIT_TEST(testFormulaAnnotateTrimOnDoubleRefs);
CPPUNIT_TEST(testFormulaRefUpdate);
CPPUNIT_TEST(testFormulaRefUpdateRange);
CPPUNIT_TEST(testFormulaRefUpdateSheets);
diff --git a/sc/qa/unit/ucalc_formula.cxx b/sc/qa/unit/ucalc_formula.cxx
index 28b078a..a908ea3 100644
--- a/sc/qa/unit/ucalc_formula.cxx
+++ b/sc/qa/unit/ucalc_formula.cxx
@@ -1432,6 +1432,115 @@ void Test::testFormulaCompilerImplicitIntersectionOperators()
m_pDoc->DeleteTab(0);
}
void Test::testFormulaAnnotateTrimOnDoubleRefs()
{
m_pDoc->InsertTab(0, "Test");
sc::AutoCalcSwitch aACSwitch(*m_pDoc, true); // turn auto calc on.
constexpr sal_Int32 nCols = 2;
constexpr sal_Int32 nRows = 5;
// Values in A1:B5
constexpr sal_Int32 aMat[nRows][nCols] = {
{4, 50},
{5, 30},
{4, 40},
{0, 70},
{5, 90}
};
for (sal_Int32 nCol = 0; nCol < nCols; ++nCol)
{
for (sal_Int32 nRow = 0; nRow < nRows; ++nRow)
m_pDoc->SetValue(nCol, nRow, 0, aMat[nRow][nCol]);
}
m_pDoc->SetValue(2, 0, 0, 4); // C1 = 4
m_pDoc->SetValue(3, 0, 0, 5); // D1 = 5
ScMarkData aMark(m_pDoc->GetSheetLimits());
aMark.SelectOneTable(0);
struct TestCase
{
OUString aFormula;
ScRange aTrimmableRange;
double fResult;
bool bMatrixFormula;
};
constexpr sal_Int32 nTestCases = 5;
TestCase aTestCases[nTestCases] = {
{
"=SUM(IF($C$1=A:A;B:B)/10*D1)",
ScRange(0, 0, 0, 0, 1048575, 0),
45.0,
true
},
{
"=SUM(IF(A:A=5;B:B)/10*D1)",
ScRange(0, 0, 0, 0, 1048575, 0),
60.0,
true
},
{
"=SUM(IF($C$1=A:A;B:B;B:B)/10*D1)", // IF has else clause
ScRange(-1, -1, -1, -1, -1, -1), // Has no trimmable double-ref.
140.0,
true
},
{
"=SUM(IF($C$1=A:A;B:B)/10*D1)",
ScRange(-1, -1, -1, -1, -1, -1), // Has no trimmable double-ref.
25,
false // Not in matrix mode.
},
{
"=SUMPRODUCT(A:A=$C$1; 1-(A:A=$C$1))",
ScRange(-1, -1, -1, -1, -1, -1), // Has no trimmable double-ref.
0.0,
false // Not in matrix mode.
},
};
for (sal_Int32 nTestIdx = 0; nTestIdx < nTestCases; ++nTestIdx)
{
TestCase& rTestCase = aTestCases[nTestIdx];
if (rTestCase.bMatrixFormula)
m_pDoc->InsertMatrixFormula(4, 0, 4, 0, aMark, rTestCase.aFormula); // Formula in E1
else
m_pDoc->SetString(ScAddress(4, 0, 0), rTestCase.aFormula); // Formula in E1
std::string aMsgStart = "TestCase#" + std::to_string(nTestIdx + 1) + " : ";
CPPUNIT_ASSERT_EQUAL_MESSAGE(aMsgStart + "Incorrect formula result", rTestCase.fResult, m_pDoc->GetValue(ScAddress(4, 0, 0)));
ScTokenArray* pCode = getTokens(*m_pDoc, ScAddress(4, 0, 0));
sal_Int32 nLen = pCode->GetCodeLen();
FormulaToken** pRPNArray = pCode->GetCode();
for (sal_Int32 nIdx = 0; nIdx < nLen; ++nIdx)
{
FormulaToken* pTok = pRPNArray[nIdx];
if (pTok && pTok->GetType() == svDoubleRef)
{
ScRange aRange = pTok->GetDoubleRef()->toAbs(*m_pDoc, ScAddress(4, 0, 0));
if (aRange == rTestCase.aTrimmableRange)
CPPUNIT_ASSERT_MESSAGE(aMsgStart + "Double ref is incorrectly flagged as not trimmable to data",
pTok->GetDoubleRef()->IsTrimToData());
else
CPPUNIT_ASSERT_MESSAGE(aMsgStart + "Double ref is incorrectly flagged as trimmable to data",
!pTok->GetDoubleRef()->IsTrimToData());
}
}
}
m_pDoc->DeleteTab(0);
}
void Test::testFormulaRefUpdate()
{
m_pDoc->InsertTab(0, "Formula");
diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx
index 6fcee51..f0e116a 100644
--- a/sc/source/core/tool/compiler.cxx
+++ b/sc/source/core/tool/compiler.cxx
@@ -6132,6 +6132,11 @@ void ScCompiler::PostProcessCode()
mPendingImplicitIntersectionOptimizations.clear();
}
void ScCompiler::AnnotateOperands()
{
AnnotateTrimOnDoubleRefs();
}
void ScCompiler::ReplaceDoubleRefII(FormulaToken** ppDoubleRefTok)
{
const ScComplexRefData* pRange = (*ppDoubleRefTok)->GetDoubleRef();
@@ -6303,4 +6308,97 @@ void ScCompiler::CorrectSumRange(const ScComplexRefData& rBaseRange,
pNewSumRangeTok->IncRef();
}
void ScCompiler::AnnotateTrimOnDoubleRefs()
{
if (!pCode || !(pCode -1) || !(*(pCode - 1)))
return;
// OpCode of the "root" operator (which is already in RPN array).
OpCode eOpCode = (*(pCode - 1))->GetOpCode();
// eOpCode can be some operator which does not change with operands with or contains zero values.
if (eOpCode != ocSum)
return;
FormulaToken** ppTok = pCode - 2; // exclude the root operator.
// The following loop runs till a "pattern" is found or there is a mismatch
// and marks the push DoubleRef arguments as trimmable when there is a match.
// The pattern is
// SUM(IF(<reference|double>=<reference|double>, <then-clause>)<a some operands with operators / or *>)
// such that one of the operands of ocEqual is a double-ref.
// Examples of formula that matches this are:
// SUM(IF(D:D=$A$1,F:F)*$H$1*2.3/$G$2)
// SUM((IF(D:D=$A$1,F:F)*$H$1*2.3/$G$2)*$H$2*5/$G$3)
// SUM(IF(E:E=16,F:F)*$H$1*100)
bool bTillClose = true;
bool bCloseTillIf = false;
sal_Int16 nToksTillIf = 0;
constexpr sal_Int16 MAXDIST_IF = 15;
while (*ppTok)
{
FormulaToken* pTok = *ppTok;
OpCode eCurrOp = pTok->GetOpCode();
++nToksTillIf;
// TODO : Is there a better way to handle this ?
// ocIf is too far off from the sum opcode.
if (nToksTillIf > MAXDIST_IF)
return;
switch (eCurrOp)
{
case ocDiv:
case ocMul:
if (!bTillClose)
return;
break;
case ocPush:
break;
case ocClose:
if (bTillClose)
{
bTillClose = false;
bCloseTillIf = true;
}
else
return;
break;
case ocIf:
{
if (!bCloseTillIf)
return;
if (!pTok->IsInForceArray())
return;
const short nJumpCount = pTok->GetJump()[0];
if (nJumpCount != 2) // Should have THEN but no ELSE.
return;
OpCode eCompOp = (*(ppTok - 1))->GetOpCode();
if (eCompOp != ocEqual)
return;
FormulaToken* pLHS = *(ppTok - 2);
FormulaToken* pRHS = *(ppTok - 3);
if (((pLHS->GetType() == svSingleRef || pLHS->GetType() == svDouble) && pRHS->GetType() == svDoubleRef) ||
((pRHS->GetType() == svSingleRef || pRHS->GetType() == svDouble) && pLHS->GetType() == svDoubleRef))
{
if (pLHS->GetType() == svDoubleRef)
pLHS->GetDoubleRef()->SetTrimToData(true);
else
pRHS->GetDoubleRef()->SetTrimToData(true);
return;
}
}
break;
default:
return;
}
--ppTok;
}
return;
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/interpr5.cxx b/sc/source/core/tool/interpr5.cxx
index 08bd2fc..1461517 100644
--- a/sc/source/core/tool/interpr5.cxx
+++ b/sc/source/core/tool/interpr5.cxx
@@ -324,6 +324,23 @@ ScMatrixRef ScInterpreter::CreateMatrixFromDoubleRef( const FormulaToken* pToken
return nullptr;
}
if (nTab1 == nTab2 && pToken)
{
const ScComplexRefData& rCRef = *pToken->GetDoubleRef();
if (rCRef.IsTrimToData())
{
// Clamp the size of the matrix area to rows which actually contain data.
// For e.g. SUM(IF over an entire column, this can make a big difference,
// But lets not trim the empty space from the top or left as this matters
// at least in matrix formulas involving IF().
// Refer ScCompiler::AnnotateTrimOnDoubleRefs() where double-refs are
// flagged for trimming.
SCCOL nTempCol = nCol1;
SCROW nTempRow = nRow1;
mrDoc.ShrinkToDataArea(nTab1, nTempCol, nTempRow, nCol2, nRow2);
}
}
SCSIZE nMatCols = static_cast<SCSIZE>(nCol2 - nCol1 + 1);
SCSIZE nMatRows = static_cast<SCSIZE>(nRow2 - nRow1 + 1);