Thread the software interpreter

Also introduce new state

ScFormulaVectorState::FormulaVectorEnabledForThreading

to indicate that using the “traditional” vectoring is disabled,
but threading should be tried.

Change-Id: I552d9e29e1ab9e5721534e07f4a45fdd5a23f399
diff --git a/sc/inc/tokenarray.hxx b/sc/inc/tokenarray.hxx
index 294794f..8b352b4 100644
--- a/sc/inc/tokenarray.hxx
+++ b/sc/inc/tokenarray.hxx
@@ -92,6 +92,7 @@ public:
        svl::SharedStringPool& rSPool,
        formula::ExternalReferenceHelper* _pRef) override;
    virtual void CheckToken( const formula::FormulaToken& r ) override;
    void CheckForThreading( OpCode eOp );
    virtual formula::FormulaToken* AddOpCode( OpCode eCode ) override;
    /** ScSingleRefToken with ocPush. */
    formula::FormulaToken* AddSingleReference( const ScSingleRefData& rRef );
diff --git a/sc/inc/types.hxx b/sc/inc/types.hxx
index 51898c2..cc82e36 100644
--- a/sc/inc/types.hxx
+++ b/sc/inc/types.hxx
@@ -59,6 +59,7 @@ enum ScFormulaVectorState

    FormulaVectorEnabled,
    FormulaVectorCheckReference,
    FormulaVectorEnabledForThreading,
    FormulaVectorUnknown
};

diff --git a/sc/source/core/data/formulacell.cxx b/sc/source/core/data/formulacell.cxx
index cf036ad..12cbd14 100644
--- a/sc/source/core/data/formulacell.cxx
+++ b/sc/source/core/data/formulacell.cxx
@@ -4341,7 +4341,9 @@ bool ScFormulaCell::InterpretFormulaGroup()
        return false;
    }

    if (!bThreadingProhibited && !ScCalcConfig::isOpenCLEnabled() && pCode->GetVectorState() != FormulaVectorDisabledNotInSubSet && officecfg::Office::Calc::Formula::Calculation::UseThreadedCalculationForFormulaGroups::get())
    if (!bThreadingProhibited && !ScCalcConfig::isOpenCLEnabled() &&
        pCode->GetVectorState() == FormulaVectorEnabledForThreading &&
        officecfg::Office::Calc::Formula::Calculation::UseThreadedCalculationForFormulaGroups::get())
    {
        // iterate over code in the formula ...
        // ensure all input is pre-calculated -
@@ -4430,26 +4432,30 @@ bool ScFormulaCell::InterpretFormulaGroup()
        return true;
    }

    bool bCanVectorize = false;
    switch (pCode->GetVectorState())
    {
        case FormulaVectorEnabled:
        case FormulaVectorCheckReference:
            // Good.
            bCanVectorize = true; // Good.
        break;

        // Not good.
        case FormulaVectorDisabledByOpCode:
            aScope.addMessage("group calc disabled due to vector state (non-vector-supporting opcode)");
            return false;
            break;
        case FormulaVectorDisabledNotInSoftwareSubset:
            aScope.addMessage("group calc disabled due to vector state (opcode not in software subset)");
            return false;
            break;
        case FormulaVectorDisabledByStackVariable:
            aScope.addMessage("group calc disabled due to vector state (non-vector-supporting stack variable)");
            return false;
            break;
        case FormulaVectorDisabledNotInSubSet:
            aScope.addMessage("group calc disabled due to vector state (opcode not in subset)");
            return false;
            break;
        case FormulaVectorEnabledForThreading:
            aScope.addMessage("group calc disabled due to vector state (wanted to try threading but couldn't)");
            break;
        case FormulaVectorDisabled:
        case FormulaVectorUnknown:
        default:
@@ -4457,6 +4463,9 @@ bool ScFormulaCell::InterpretFormulaGroup()
            return false;
    }

    if (!bCanVectorize)
        return false;

    if (!ScCalcConfig::isOpenCLEnabled() && !ScCalcConfig::isSwInterpreterEnabled())
    {
        aScope.addMessage("opencl not enabled and sw interpreter not enabled");
diff --git a/sc/source/core/tool/formulagroup.cxx b/sc/source/core/tool/formulagroup.cxx
index f637ab5..f0183c6 100644
--- a/sc/source/core/tool/formulagroup.cxx
+++ b/sc/source/core/tool/formulagroup.cxx
@@ -18,9 +18,11 @@
#include <interpre.hxx>
#include <scmatrix.hxx>
#include <globalnames.hxx>
#include <comphelper/threadpool.hxx>

#include <formula/vectortoken.hxx>
#include <officecfg/Office/Common.hxx>
#include <officecfg/Office/Calc.hxx>
#if HAVE_FEATURE_OPENCL
#include <opencl/platforminfo.hxx>
#endif
@@ -145,116 +147,207 @@ ScMatrixRef FormulaGroupInterpreterSoftware::inverseMatrix(const ScMatrix& /*rMa
    return ScMatrixRef();
}

class SoftwareInterpreterFunc
{
public:
    SoftwareInterpreterFunc(ScTokenArray& rCode,
                            ScAddress aBatchTopPos,
                            const ScAddress& rTopPos,
                            ScDocument& rDoc,
                            std::vector<formula::FormulaConstTokenRef>& rRes,
                            SCROW nIndex,
                            SCROW nLastIndex) :
        mrCode(rCode),
        maBatchTopPos(aBatchTopPos),
        mrTopPos(rTopPos),
        mrDoc(rDoc),
        mrResults(rRes),
        mnIdx(nIndex),
        mnLastIdx(nLastIndex)
    {
    }

    void operator() ()
    {
        double fNan;
        rtl::math::setNan(&fNan);
        for (SCROW i = mnIdx; i <= mnLastIdx; ++i, maBatchTopPos.IncRow())
        {
            ScTokenArray aCode2;
            formula::FormulaTokenArrayPlainIterator aIter(mrCode);
            for (const formula::FormulaToken* p = aIter.First(); p; p = aIter.Next())
            {
                switch (p->GetType())
                {
                    case formula::svSingleVectorRef:
                    {
                        const formula::SingleVectorRefToken* p2 = static_cast<const formula::SingleVectorRefToken*>(p);
                        const formula::VectorRefArray& rArray = p2->GetArray();

                        rtl_uString* pStr = nullptr;
                        double fVal = fNan;
                        if (static_cast<size_t>(i) < p2->GetArrayLength())
                        {
                            if (rArray.mpStringArray)
                                // See if the cell is of string type.
                                pStr = rArray.mpStringArray[i];

                            if (!pStr && rArray.mpNumericArray)
                                fVal = rArray.mpNumericArray[i];
                        }

                        if (pStr)
                        {
                            // This is a string cell.
                            svl::SharedStringPool& rPool = mrDoc.GetSharedStringPool();
                            aCode2.AddString(rPool.intern(OUString(pStr)));
                        }
                        else if (rtl::math::isNan(fVal))
                            // Value of NaN represents an empty cell.
                            aCode2.AddToken(ScEmptyCellToken(false, false));
                        else
                            // Numeric cell.
                            aCode2.AddDouble(fVal);
                    }
                    break;
                    case formula::svDoubleVectorRef:
                    {
                        const formula::DoubleVectorRefToken* p2 = static_cast<const formula::DoubleVectorRefToken*>(p);
                        size_t nRowStart = p2->IsStartFixed() ? 0 : i;
                        size_t nRowEnd = p2->GetRefRowSize() - 1;
                        if (!p2->IsEndFixed())
                            nRowEnd += i;

                        assert(nRowStart <= nRowEnd);
                        ScMatrixRef pMat(new ScVectorRefMatrix(p2, nRowStart, nRowEnd - nRowStart + 1));

                        if (p2->IsStartFixed() && p2->IsEndFixed())
                        {
                            // Cached the converted token for absolute range reference.
                            ScComplexRefData aRef;
                            ScRange aRefRange = mrTopPos;
                            aRefRange.aEnd.SetRow(mrTopPos.Row() + nRowEnd);
                            aRef.InitRange(aRefRange);
                            formula::FormulaTokenRef xTok(new ScMatrixRangeToken(pMat, aRef));
                            aCode2.AddToken(*xTok);
                        }
                        else
                        {
                            ScMatrixToken aTok(pMat);
                            aCode2.AddToken(aTok);
                        }
                    }
                    break;
                    default:
                        aCode2.AddToken(*p);
                } // end of switch statement
            } // end of formula token for loop

            ScFormulaCell* pDest = mrDoc.GetFormulaCell(maBatchTopPos);
            if (!pDest)
                return;

            ScCompiler aComp(&mrDoc, maBatchTopPos, aCode2);
            aComp.CompileTokenArray();
            ScInterpreter aInterpreter(pDest, &mrDoc, mrDoc.GetNonThreadedContext(), maBatchTopPos, aCode2);
            aInterpreter.Interpret();
            mrResults[i] = aInterpreter.GetResultToken();
        } // Row iteration for loop end
    } // operator () end

private:
    ScTokenArray& mrCode;
    ScAddress maBatchTopPos;
    const ScAddress& mrTopPos;
    ScDocument& mrDoc;
    std::vector<formula::FormulaConstTokenRef>& mrResults;
    SCROW mnIdx;
    SCROW mnLastIdx;
};

bool FormulaGroupInterpreterSoftware::interpret(ScDocument& rDoc, const ScAddress& rTopPos,
                                                ScFormulaCellGroupRef& xGroup,
                                                ScTokenArray& rCode)
{
    typedef std::unordered_map<const formula::FormulaToken*, formula::FormulaTokenRef> CachedTokensType;

    // Decompose the group into individual cells and calculate them individually.

    // The caller must ensure that the top position is the start position of
    // the group.

    ScAddress aTmpPos = rTopPos;
    std::vector<formula::FormulaConstTokenRef> aResults;
    aResults.reserve(xGroup->mnLength);
    CachedTokensType aCachedTokens;
    std::vector<formula::FormulaConstTokenRef> aResults(xGroup->mnLength);

    double fNan;
    rtl::math::setNan(&fNan);

    for (SCROW i = 0; i < xGroup->mnLength; ++i, aTmpPos.IncRow())
    class Executor : public comphelper::ThreadTask
    {
        ScTokenArray aCode2;
        formula::FormulaTokenArrayPlainIterator aIter(rCode);
        for (const formula::FormulaToken* p = aIter.First(); p; p = aIter.Next())
    public:
        Executor(std::shared_ptr<comphelper::ThreadTaskTag>& rTag,
                 ScTokenArray& rCode2,
                 ScAddress aBatchTopPos,
                 const ScAddress& rTopPos2,
                 ScDocument& rDoc2,
                 std::vector<formula::FormulaConstTokenRef>& rRes,
                 SCROW nIndex,
                 SCROW nLastIndex) :
            comphelper::ThreadTask(rTag),
            maSWIFunc(rCode2, aBatchTopPos, rTopPos2, rDoc2, rRes, nIndex, nLastIndex)
        {
            CachedTokensType::iterator it = aCachedTokens.find(p);
            if (it != aCachedTokens.end())
            {
                // This token is cached. Use the cached one.
                aCode2.AddToken(*it->second);
                continue;
            }

            switch (p->GetType())
            {
                case formula::svSingleVectorRef:
                {
                    const formula::SingleVectorRefToken* p2 = static_cast<const formula::SingleVectorRefToken*>(p);
                    const formula::VectorRefArray& rArray = p2->GetArray();

                    rtl_uString* pStr = nullptr;
                    double fVal = fNan;
                    if (static_cast<size_t>(i) < p2->GetArrayLength())
                    {
                        if (rArray.mpStringArray)
                            // See if the cell is of string type.
                            pStr = rArray.mpStringArray[i];

                        if (!pStr && rArray.mpNumericArray)
                            fVal = rArray.mpNumericArray[i];
                    }

                    if (pStr)
                    {
                        // This is a string cell.
                        svl::SharedStringPool& rPool = rDoc.GetSharedStringPool();
                        aCode2.AddString(rPool.intern(OUString(pStr)));
                    }
                    else if (rtl::math::isNan(fVal))
                        // Value of NaN represents an empty cell.
                        aCode2.AddToken(ScEmptyCellToken(false, false));
                    else
                        // Numeric cell.
                        aCode2.AddDouble(fVal);
                }
                break;
                case formula::svDoubleVectorRef:
                {
                    const formula::DoubleVectorRefToken* p2 = static_cast<const formula::DoubleVectorRefToken*>(p);
                    size_t nRowStart = p2->IsStartFixed() ? 0 : i;
                    size_t nRowEnd = p2->GetRefRowSize() - 1;
                    if (!p2->IsEndFixed())
                        nRowEnd += i;

                    assert(nRowStart <= nRowEnd);
                    ScMatrixRef pMat(new ScVectorRefMatrix(p2, nRowStart, nRowEnd - nRowStart + 1));

                    if (p2->IsStartFixed() && p2->IsEndFixed())
                    {
                        // Cached the converted token for absolute range reference.
                        ScComplexRefData aRef;
                        ScRange aRefRange = rTopPos;
                        aRefRange.aEnd.SetRow(rTopPos.Row() + nRowEnd);
                        aRef.InitRange(aRefRange);
                        formula::FormulaTokenRef xTok(new ScMatrixRangeToken(pMat, aRef));
                        aCachedTokens.emplace(p, xTok);
                        aCode2.AddToken(*xTok);
                    }
                    else
                    {
                        ScMatrixToken aTok(pMat);
                        aCode2.AddToken(aTok);
                    }
                }
                break;
                default:
                    aCode2.AddToken(*p);
            }
        }
        virtual void doWork() override
        {
            maSWIFunc();
        }

        ScFormulaCell* pDest = rDoc.GetFormulaCell(aTmpPos);
        if (!pDest)
            return false;
    private:
        SoftwareInterpreterFunc maSWIFunc;
    };

        ScCompiler aComp(&rDoc, aTmpPos, aCode2);
        aComp.CompileTokenArray();
        ScInterpreter aInterpreter(pDest, &rDoc, rDoc.GetNonThreadedContext(), aTmpPos, aCode2);
        aInterpreter.Interpret();
        aResults.push_back(aInterpreter.GetResultToken());
    } // for loop end (xGroup->mnLength)
    static const bool bThreadingProhibited = std::getenv("SC_NO_THREADED_CALCULATION");

    bool bUseThreading = !bThreadingProhibited && officecfg::Office::Calc::Formula::Calculation::UseThreadedCalculationForFormulaGroups::get();

    if (bUseThreading)
    {
        comphelper::ThreadPool& rThreadPool(comphelper::ThreadPool::getSharedOptimalPool());
        sal_Int32 nThreadCount = rThreadPool.getWorkerCount();

        SCROW nLen = xGroup->mnLength;
        SCROW nBatchSize = nLen / nThreadCount;
        if (nLen < nThreadCount)
        {
            nBatchSize = 1;
            nThreadCount = nLen;
        }
        SCROW nRemaining = nLen - nBatchSize * nThreadCount;

        SAL_INFO("sc.threaded", "Running " << nThreadCount << " threads");

        SCROW nLeft = nLen;
        SCROW nStart = 0;
        std::shared_ptr<comphelper::ThreadTaskTag> aTag = comphelper::ThreadPool::createThreadTaskTag();
        while (nLeft > 0)
        {
            SCROW nCount = std::min(nLeft, nBatchSize) + (nRemaining ? 1 : 0);
            if ( nRemaining )
                --nRemaining;
            SCROW nLast = nStart + nCount - 1;
            rThreadPool.pushTask(new Executor(aTag, rCode, aTmpPos, rTopPos, rDoc, aResults, nStart, nLast));
            aTmpPos.IncRow(nCount);
            nLeft -= nCount;
            nStart = nLast + 1;
        }
        SAL_INFO("sc.threaded", "Joining threads");
        rThreadPool.waitUntilDone(aTag);
        SAL_INFO("sc.threaded", "Done");
    }
    else
    {
        SoftwareInterpreterFunc aSWIFunc(rCode, aTmpPos, rTopPos, rDoc, aResults, 0, xGroup->mnLength - 1);
        aSWIFunc();
    }

    for (SCROW i = 0; i < xGroup->mnLength; ++i)
        if (!aResults[i].get())
            return false;

    if (!aResults.empty())
        rDoc.SetFormulaResults(rTopPos, &aResults[0], aResults.size());
diff --git a/sc/source/core/tool/token.cxx b/sc/source/core/tool/token.cxx
index 5349e2d..2ded452 100644
--- a/sc/source/core/tool/token.cxx
+++ b/sc/source/core/tool/token.cxx
@@ -1336,7 +1336,7 @@ bool ScTokenArray::AddFormulaToken(
    return bError;
}

void ScTokenArray::CheckToken( const FormulaToken& r )
void ScTokenArray::CheckForThreading( OpCode eOp  )
{
    static const std::set<OpCode> aThreadedCalcBlackList({
        ocIndirect,
@@ -1345,23 +1345,32 @@ void ScTokenArray::CheckToken( const FormulaToken& r )
        ocTableOp
    });

    if (IsFormulaVectorDisabled())
        // It's already disabled.  No more checking needed.
        return;
    // We only call this if it was already disabled
    assert(IsFormulaVectorDisabled());

    static const bool bThreadingProhibited = std::getenv("SC_NO_THREADED_CALCULATION");

    OpCode eOp = r.GetOpCode();

    if (!bThreadingProhibited && !ScCalcConfig::isOpenCLEnabled() && officecfg::Office::Calc::Formula::Calculation::UseThreadedCalculationForFormulaGroups::get())
    {
        if (aThreadedCalcBlackList.count(eOp))
        {
            meVectorState = FormulaVectorDisabledNotInSubSet;
            SAL_INFO("sc.core.formulagroup", "opcode " << formula::FormulaCompiler().GetOpCodeMap(sheet::FormulaLanguage::ENGLISH)->getSymbol(eOp) << " disables threaded calculation of formula group");
        }
        return;
        else
        {
            SAL_INFO("sc.core.formulagroup", "but enabling for threading instead");
            meVectorState = FormulaVectorEnabledForThreading;
        }
    }
}

void ScTokenArray::CheckToken( const FormulaToken& r )
{
    if (IsFormulaVectorDisabled())
        // It's already disabled.  No more checking needed.
        return;

    OpCode eOp = r.GetOpCode();

    if (SC_OPCODE_START_FUNCTION <= eOp && eOp < SC_OPCODE_STOP_FUNCTION)
    {
@@ -1370,6 +1379,7 @@ void ScTokenArray::CheckToken( const FormulaToken& r )
        {
            SAL_INFO("sc.opencl", "opcode " << formula::FormulaCompiler().GetOpCodeMap(sheet::FormulaLanguage::ENGLISH)->getSymbol(eOp) << " disables vectorisation for formula group");
            meVectorState = FormulaVectorDisabledNotInSubSet;
            CheckForThreading(eOp);
            return;
        }

@@ -1381,6 +1391,7 @@ void ScTokenArray::CheckToken( const FormulaToken& r )
        {
            SAL_INFO("sc.core.formulagroup", "opcode " << formula::FormulaCompiler().GetOpCodeMap(sheet::FormulaLanguage::ENGLISH)->getSymbol(eOp) << " disables S/W interpreter for formula group");
            meVectorState = FormulaVectorDisabledNotInSoftwareSubset;
            CheckForThreading(eOp);
            return;
        }

@@ -1608,6 +1619,7 @@ void ScTokenArray::CheckToken( const FormulaToken& r )
                // We don't support vectorization on these.
                SAL_INFO("sc.opencl", "opcode ocPush: variable type " << StackVarEnumToString(r.GetType()) << " disables vectorisation for formula group");
                meVectorState = FormulaVectorDisabledByStackVariable;
                CheckForThreading(eOp);
            break;
            default:
                ;
@@ -1619,6 +1631,7 @@ void ScTokenArray::CheckToken( const FormulaToken& r )
    {
        SAL_INFO("sc.opencl", "opcode " << formula::FormulaCompiler().GetOpCodeMap(sheet::FormulaLanguage::ENGLISH)->getSymbol(eOp) << " disables vectorisation for formula group");
        meVectorState = FormulaVectorDisabledNotInSubSet;
        CheckForThreading(eOp);
    }
    // only when openCL interpreter is not enabled - the assumption is that
    // the S/W interpreter blacklist is more strict
@@ -1629,6 +1642,7 @@ void ScTokenArray::CheckToken( const FormulaToken& r )
    {
        SAL_INFO("sc.core.formulagroup", "opcode " << formula::FormulaCompiler().GetOpCodeMap(sheet::FormulaLanguage::ENGLISH)->getSymbol(eOp) << " disables S/W interpreter for formula group");
        meVectorState = FormulaVectorDisabledNotInSoftwareSubset;
        CheckForThreading(eOp);
    }
}

@@ -1756,6 +1770,7 @@ bool ScTokenArray::IsFormulaVectorDisabled() const
        case FormulaVectorDisabledNotInSoftwareSubset:
        case FormulaVectorDisabledByStackVariable:
        case FormulaVectorDisabledNotInSubSet:
        case FormulaVectorEnabledForThreading:
            return true;
        default:
            ;