tdf#133858 reduce the double-ref range to data content

in certain matrix formulas like SUM(IF(A1=G:G, H:H)*B1/B2) where whole
columns are used for comparison in the condition of IF ultimately
followed by a reducer like SUM. In such cases we can safely reduce the
double-refs involved in the comparison to the sheet area where there is
data before converting the data to ScMatrix.

This is a more restricted version of Noel's fix in
37ffe509ef011357123642577c04ff296d59ce68

Change-Id: I1c2e8985adedb3f4c4648f541fb0e8e7d0fae033
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109050
Reviewed-by: Luboš Luňák <l.lunak@collabora.com>
Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com>
(cherry picked from commit 65167a9265acfea04733b5ff6ee3220a9da624f4)
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109118
Tested-by: Jenkins
Reviewed-by: Dennis Francis <dennis.francis@collabora.com>
(cherry picked from commit b14107dd0eaf9bfc276544e1900873d36075425e)
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/109290
Reviewed-by: Xisco Fauli <xiscofauli@libreoffice.org>
diff --git a/formula/source/core/api/FormulaCompiler.cxx b/formula/source/core/api/FormulaCompiler.cxx
index bd1d1f0..73669e8 100644
--- a/formula/source/core/api/FormulaCompiler.cxx
+++ b/formula/source/core/api/FormulaCompiler.cxx
@@ -1685,18 +1685,25 @@ void FormulaCompiler::Factor()
                    HandleIIOpCode(pFacToken, pArgArray,
                                   std::min(nSepCount, static_cast<sal_uInt32>(FORMULA_MAXPARAMSII)));
            }
            bool bDone = false;
            if (bBadName)
                ;   // nothing, keep current token for return
            else if (eOp != ocClose)
                SetError( FormulaError::PairExpected);
            else
            {
                NextToken();
                bDone = true;
            }
            // Jumps are just normal functions for the FunctionAutoPilot tree view
            if (!mbJumpCommandReorder && pFacToken->GetType() == svJump)
                pFacToken = new FormulaFAPToken( pFacToken->GetOpCode(), nSepCount, pFacToken );
            else
                pFacToken->SetByte( nSepCount );
            PutCode( pFacToken );

            if (bDone)
                AnnotateOperands();
        }
        else if (IsOpCodeJumpCommand(eOp))
        {
diff --git a/include/formula/FormulaCompiler.hxx b/include/formula/FormulaCompiler.hxx
index 9fdd5a5..c455ca4 100644
--- a/include/formula/FormulaCompiler.hxx
+++ b/include/formula/FormulaCompiler.hxx
@@ -334,6 +334,8 @@ protected:
    // Called from CompileTokenArray() after RPN code generation is done.
    virtual void PostProcessCode() {}

    virtual void AnnotateOperands() {}

    OUString            aCorrectedFormula;      // autocorrected Formula
    OUString            aCorrectedSymbol;       // autocorrected Symbol

diff --git a/sc/inc/compiler.hxx b/sc/inc/compiler.hxx
index 5299cf5..b051f3a 100644
--- a/sc/inc/compiler.hxx
+++ b/sc/inc/compiler.hxx
@@ -514,10 +514,12 @@ private:
    bool HandleIIOpCodeInternal(formula::FormulaToken* token, formula::FormulaToken*** pppToken, sal_uInt8 nNumParams);
    bool SkipImplicitIntersectionOptimization(const formula::FormulaToken* token) const;
    virtual void PostProcessCode() override;
    virtual void AnnotateOperands() override;
    static bool ParameterMayBeImplicitIntersection(const formula::FormulaToken* token, int parameter);
    void ReplaceDoubleRefII(formula::FormulaToken** ppDoubleRefTok);
    bool AdjustSumRangeShape(const ScComplexRefData& rBaseRange, ScComplexRefData& rSumRange);
    void CorrectSumRange(const ScComplexRefData& rBaseRange, ScComplexRefData& rSumRange, formula::FormulaToken** ppSumRangeToken);
    void AnnotateTrimOnDoubleRefs();
};

#endif
diff --git a/sc/inc/refdata.hxx b/sc/inc/refdata.hxx
index a2c9313..9b57fe8 100644
--- a/sc/inc/refdata.hxx
+++ b/sc/inc/refdata.hxx
@@ -124,6 +124,11 @@ struct ScComplexRefData
{
    ScSingleRefData Ref1;
    ScSingleRefData Ref2;
    bool bTrimToData;

    ScComplexRefData() :
        bTrimToData(false)
    {}

    void InitFlags()
        { Ref1.InitFlags(); Ref2.InitFlags(); }
@@ -198,6 +203,9 @@ struct ScComplexRefData

    bool IsDeleted() const;

    bool IsTrimToData() const { return bTrimToData; }
    void SetTrimToData(bool bSet) { bTrimToData = bSet; }

#if DEBUG_FORMULA_COMPILER
    void Dump( int nIndent = 0 ) const;
#endif
diff --git a/sc/qa/unit/ucalc.hxx b/sc/qa/unit/ucalc.hxx
index 40ef0d2..a49a26d 100644
--- a/sc/qa/unit/ucalc.hxx
+++ b/sc/qa/unit/ucalc.hxx
@@ -159,6 +159,7 @@ public:
    void testFormulaCompilerImplicitIntersection1ParamWithChange();
    void testFormulaCompilerImplicitIntersection1NoGroup();
    void testFormulaCompilerImplicitIntersectionOperators();
    void testFormulaAnnotateTrimOnDoubleRefs();
    void testFormulaRefUpdate();
    void testFormulaRefUpdateRange();
    void testFormulaRefUpdateSheets();
@@ -607,6 +608,7 @@ public:
    CPPUNIT_TEST(testFormulaCompilerImplicitIntersection1ParamWithChange);
    CPPUNIT_TEST(testFormulaCompilerImplicitIntersection1NoGroup);
    CPPUNIT_TEST(testFormulaCompilerImplicitIntersectionOperators);
    CPPUNIT_TEST(testFormulaAnnotateTrimOnDoubleRefs);
    CPPUNIT_TEST(testFormulaRefUpdate);
    CPPUNIT_TEST(testFormulaRefUpdateRange);
    CPPUNIT_TEST(testFormulaRefUpdateSheets);
diff --git a/sc/qa/unit/ucalc_formula.cxx b/sc/qa/unit/ucalc_formula.cxx
index 16258b1..1f55524 100644
--- a/sc/qa/unit/ucalc_formula.cxx
+++ b/sc/qa/unit/ucalc_formula.cxx
@@ -1432,6 +1432,115 @@ void Test::testFormulaCompilerImplicitIntersectionOperators()
    m_pDoc->DeleteTab(0);
}

void Test::testFormulaAnnotateTrimOnDoubleRefs()
{
    m_pDoc->InsertTab(0, "Test");
    sc::AutoCalcSwitch aACSwitch(*m_pDoc, true); // turn auto calc on.

    constexpr sal_Int32 nCols = 2;
    constexpr sal_Int32 nRows = 5;

    // Values in A1:B5
    constexpr sal_Int32 aMat[nRows][nCols] = {
        {4, 50},
        {5, 30},
        {4, 40},
        {0, 70},
        {5, 90}
    };

    for (sal_Int32 nCol = 0; nCol < nCols; ++nCol)
    {
        for (sal_Int32 nRow = 0; nRow < nRows; ++nRow)
            m_pDoc->SetValue(nCol, nRow, 0, aMat[nRow][nCol]);
    }

    m_pDoc->SetValue(2, 0, 0, 4); // C1 = 4
    m_pDoc->SetValue(3, 0, 0, 5); // D1 = 5

    ScMarkData aMark(m_pDoc->GetSheetLimits());
    aMark.SelectOneTable(0);

    struct TestCase
    {
        OUString aFormula;
        ScRange aTrimmableRange;
        double fResult;
        bool bMatrixFormula;
    };

    constexpr sal_Int32 nTestCases = 5;
    TestCase aTestCases[nTestCases] = {
        {
            "=SUM(IF($C$1=A:A;B:B)/10*D1)",
            ScRange(0, 0, 0, 0, 1048575, 0),
            45.0,
            true
        },

        {
            "=SUM(IF(A:A=5;B:B)/10*D1)",
            ScRange(0, 0, 0, 0, 1048575, 0),
            60.0,
            true
        },

        {
            "=SUM(IF($C$1=A:A;B:B;B:B)/10*D1)",  // IF has else clause
            ScRange(-1, -1, -1, -1, -1, -1),     // Has no trimmable double-ref.
            140.0,
            true
        },

        {
            "=SUM(IF($C$1=A:A;B:B)/10*D1)",
            ScRange(-1, -1, -1, -1, -1, -1),     // Has no trimmable double-ref.
            25,
            false                                // Not in matrix mode.
        },

        {
            "=SUMPRODUCT(A:A=$C$1; 1-(A:A=$C$1))",
            ScRange(-1, -1, -1, -1, -1, -1),     // Has no trimmable double-ref.
            0.0,
            false                                // Not in matrix mode.
        },
    };

    for (sal_Int32 nTestIdx = 0; nTestIdx < nTestCases; ++nTestIdx)
    {
        TestCase& rTestCase = aTestCases[nTestIdx];
        if (rTestCase.bMatrixFormula)
            m_pDoc->InsertMatrixFormula(4, 0, 4, 0, aMark, rTestCase.aFormula); // Formula in E1
        else
            m_pDoc->SetString(ScAddress(4, 0, 0), rTestCase.aFormula);          // Formula in E1

        std::string aMsgStart = "TestCase#" + std::to_string(nTestIdx + 1) + " : ";
        CPPUNIT_ASSERT_EQUAL_MESSAGE(aMsgStart + "Incorrect formula result", rTestCase.fResult, m_pDoc->GetValue(ScAddress(4, 0, 0)));

        ScTokenArray* pCode = getTokens(*m_pDoc, ScAddress(4, 0, 0));
        sal_Int32 nLen = pCode->GetCodeLen();
        FormulaToken** pRPNArray = pCode->GetCode();

        for (sal_Int32 nIdx = 0; nIdx < nLen; ++nIdx)
        {
            FormulaToken* pTok = pRPNArray[nIdx];
            if (pTok && pTok->GetType() == svDoubleRef)
            {
                ScRange aRange = pTok->GetDoubleRef()->toAbs(*m_pDoc, ScAddress(4, 0, 0));
                if (aRange == rTestCase.aTrimmableRange)
                    CPPUNIT_ASSERT_MESSAGE(aMsgStart + "Double ref is incorrectly flagged as not trimmable to data",
                        pTok->GetDoubleRef()->IsTrimToData());
                else
                    CPPUNIT_ASSERT_MESSAGE(aMsgStart + "Double ref is incorrectly flagged as trimmable to data",
                        !pTok->GetDoubleRef()->IsTrimToData());
            }
        }
    }

    m_pDoc->DeleteTab(0);
}

void Test::testFormulaRefUpdate()
{
    m_pDoc->InsertTab(0, "Formula");
diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx
index f4e100c..eb87065 100644
--- a/sc/source/core/tool/compiler.cxx
+++ b/sc/source/core/tool/compiler.cxx
@@ -6136,6 +6136,11 @@ void ScCompiler::PostProcessCode()
    mPendingImplicitIntersectionOptimizations.clear();
}

void ScCompiler::AnnotateOperands()
{
    AnnotateTrimOnDoubleRefs();
}

void ScCompiler::ReplaceDoubleRefII(FormulaToken** ppDoubleRefTok)
{
    const ScComplexRefData* pRange = (*ppDoubleRefTok)->GetDoubleRef();
@@ -6307,4 +6312,97 @@ void ScCompiler::CorrectSumRange(const ScComplexRefData& rBaseRange,
    pNewSumRangeTok->IncRef();
}

void ScCompiler::AnnotateTrimOnDoubleRefs()
{
    if (!pCode || !(pCode -1) || !(*(pCode - 1)))
        return;

    // OpCode of the "root" operator (which is already in RPN array).
    OpCode eOpCode = (*(pCode - 1))->GetOpCode();
    // eOpCode can be some operator which does not change with operands with or contains zero values.
    if (eOpCode != ocSum)
        return;

    FormulaToken** ppTok = pCode - 2; // exclude the root operator.
    // The following loop runs till a "pattern" is found or there is a mismatch
    // and marks the push DoubleRef arguments as trimmable when there is a match.
    // The pattern is
    // SUM(IF(<reference|double>=<reference|double>, <then-clause>)<a some operands with operators / or *>)
    // such that one of the operands of ocEqual is a double-ref.
    // Examples of formula that matches this are:
    //   SUM(IF(D:D=$A$1,F:F)*$H$1*2.3/$G$2)
    //   SUM((IF(D:D=$A$1,F:F)*$H$1*2.3/$G$2)*$H$2*5/$G$3)
    //   SUM(IF(E:E=16,F:F)*$H$1*100)
    bool bTillClose = true;
    bool bCloseTillIf = false;
    sal_Int16 nToksTillIf = 0;
    constexpr sal_Int16 MAXDIST_IF = 15;
    while (*ppTok)
    {
        FormulaToken* pTok = *ppTok;
        OpCode eCurrOp = pTok->GetOpCode();
        ++nToksTillIf;

        // TODO : Is there a better way to handle this ?
        // ocIf is too far off from the sum opcode.
        if (nToksTillIf > MAXDIST_IF)
            return;

        switch (eCurrOp)
        {
            case ocDiv:
            case ocMul:
                if (!bTillClose)
                    return;
                break;
            case ocPush:

                break;
            case ocClose:
                if (bTillClose)
                {
                    bTillClose = false;
                    bCloseTillIf = true;
                }
                else
                    return;
                break;
            case ocIf:
                {
                    if (!bCloseTillIf)
                        return;

                    if (!pTok->IsInForceArray())
                        return;

                    const short nJumpCount = pTok->GetJump()[0];
                    if (nJumpCount != 2) // Should have THEN but no ELSE.
                        return;

                    OpCode eCompOp = (*(ppTok - 1))->GetOpCode();
                    if (eCompOp != ocEqual)
                        return;

                    FormulaToken* pLHS = *(ppTok - 2);
                    FormulaToken* pRHS = *(ppTok - 3);
                    if (((pLHS->GetType() == svSingleRef || pLHS->GetType() == svDouble) && pRHS->GetType() == svDoubleRef) ||
                        ((pRHS->GetType() == svSingleRef || pRHS->GetType() == svDouble) && pLHS->GetType() == svDoubleRef))
                    {
                        if (pLHS->GetType() == svDoubleRef)
                            pLHS->GetDoubleRef()->SetTrimToData(true);
                        else
                            pRHS->GetDoubleRef()->SetTrimToData(true);
                        return;
                    }
                }
                break;
            default:
                return;
        }
        --ppTok;
    }

    return;
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/core/tool/interpr5.cxx b/sc/source/core/tool/interpr5.cxx
index 3930de3..2ea4eef 100644
--- a/sc/source/core/tool/interpr5.cxx
+++ b/sc/source/core/tool/interpr5.cxx
@@ -325,6 +325,23 @@ ScMatrixRef ScInterpreter::CreateMatrixFromDoubleRef( const FormulaToken* pToken
        return nullptr;
    }

    if (nTab1 == nTab2 && pToken)
    {
        const ScComplexRefData& rCRef = *pToken->GetDoubleRef();
        if (rCRef.IsTrimToData())
        {
            // Clamp the size of the matrix area to rows which actually contain data.
            // For e.g. SUM(IF over an entire column, this can make a big difference,
            // But lets not trim the empty space from the top or left as this matters
            // at least in matrix formulas involving IF().
            // Refer ScCompiler::AnnotateTrimOnDoubleRefs() where double-refs are
            // flagged for trimming.
            SCCOL nTempCol = nCol1;
            SCROW nTempRow = nRow1;
            mrDoc.ShrinkToDataArea(nTab1, nTempCol, nTempRow, nCol2, nRow2);
        }
    }

    SCSIZE nMatCols = static_cast<SCSIZE>(nCol2 - nCol1 + 1);
    SCSIZE nMatRows = static_cast<SCSIZE>(nRow2 - nRow1 + 1);