Resolves: tdf#137091 Use CharClass matching the formula language

 This is a combination of 3 commits.

Resolves: tdf#137091 Use CharClass matching the formula language

... not the current locale. Specifically important for
uppercase/lowercase conversions that may yield different results
for example in Turkish i with/without dot.

I2aa57cdcf530d7a0697c4ffbd5dccb86bb526d8e
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103588
Tested-by: Jenkins
Reviewed-by: Eike Rathke <erack@redhat.com>
(cherry picked from commit 3c6177be2705303044e3de262689d593f3d0f282)
Signed-off-by: Xisco Fauli <xiscofauli@libreoffice.org>

Current sytem locale's CharClass for user defined names, tdf#137091 follow-up

I5f025a12ca183acb3f80d2a7527677aceb9ffbd5
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103593
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
(cherry picked from commit d41c45a522c5e973d7043d36bc6c82e77735ab9b)

Determine CharClass difference once, tdf#137091 follow-up

As a side note:
Clang plugin simplifybool for
!(rLT1.getLanguage() == "en" && rLT2.getLanguage() == "en")
told "error: logical negation of logical op containing negation, can be simplified"
which is nonsense (the message stayed the same while the checks evolved).
It actually complained about !(a==b && c==d) to be rewritten as
(a!=b || c!=d) whether that makes sense or not.. it may save one
boolean operation, yes, but..

Ib478d46d7ff926c1c9f65fec059c7a3f31fa7ce3
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103601
Tested-by: Jenkins
Reviewed-by: Eike Rathke <erack@redhat.com>
(cherry picked from commit 1acf517906b7cdc4931dd26319d467dff53ae7d2)

 Conflicts:
	sc/source/core/tool/compiler.cxx

Change-Id: I2aa57cdcf530d7a0697c4ffbd5dccb86bb526d8e
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103598
Tested-by: Jenkins
Reviewed-by: Xisco Fauli <xiscofauli@libreoffice.org>
diff --git a/formula/source/core/api/FormulaCompiler.cxx b/formula/source/core/api/FormulaCompiler.cxx
index e6a224f..e969ecb 100644
--- a/formula/source/core/api/FormulaCompiler.cxx
+++ b/formula/source/core/api/FormulaCompiler.cxx
@@ -29,6 +29,9 @@

#include <svl/zforlist.hxx>
#include <unotools/charclass.hxx>
#include <vcl/svapp.hxx>
#include <vcl/settings.hxx>
#include <comphelper/processfactory.hxx>
#include <com/sun/star/sheet/FormulaOpCodeMapEntry.hpp>
#include <com/sun/star/sheet/FormulaMapGroup.hpp>
#include <com/sun/star/sheet/FormulaMapGroupSpecialOffset.hpp>
@@ -140,6 +143,14 @@ void lclPushOpCodeMapEntries( ::std::vector< sheet::FormulaOpCodeMapEntry >& rVe
        lclPushOpCodeMapEntry( rVec, pTable, *pnOpCodes );
}

CharClass* createCharClassIfNonEnglishUI()
{
    const LanguageTag& rLanguageTag( Application::GetSettings().GetUILanguageTag());
    if (rLanguageTag.getLanguage() == "en")
        return nullptr;
    return new CharClass( ::comphelper::getProcessComponentContext(), rLanguageTag);
}

class OpCodeList
{
public:
@@ -163,8 +174,8 @@ OpCodeList::OpCodeList(bool bLocalized, const std::pair<const char*, int>* pSymb
    , mpSymbols(pSymbols)
    , mbLocalized(bLocalized)
{
    SvtSysLocale aSysLocale;
    const CharClass* pCharClass = (xMap->isEnglish() ? nullptr : aSysLocale.GetCharClassPtr());
    std::unique_ptr<CharClass> xCharClass( xMap->isEnglish() ? nullptr : createCharClassIfNonEnglishUI());
    const CharClass* pCharClass = xCharClass.get();
    if (meSepType == FormulaCompiler::SeparatorType::RESOURCE_BASE)
    {
        for (sal_uInt16 i = 0; i <= SC_OPCODE_LAST_OPCODE_ID; ++i)
@@ -809,8 +820,8 @@ FormulaCompiler::OpCodeMapPtr FormulaCompiler::CreateOpCodeMap(
    NonConstOpCodeMapPtr xMap = std::make_shared<OpCodeMap>( SC_OPCODE_LAST_OPCODE_ID + 1, false,
                FormulaGrammar::mergeToGrammar( FormulaGrammar::setEnglishBit(
                        FormulaGrammar::GRAM_EXTERNAL, bEnglish), FormulaGrammar::CONV_UNSPECIFIED));
    SvtSysLocale aSysLocale;
    const CharClass* pCharClass = (xMap->isEnglish() ? nullptr : aSysLocale.GetCharClassPtr());
    std::unique_ptr<CharClass> xCharClass( xMap->isEnglish() ? nullptr : createCharClassIfNonEnglishUI());
    const CharClass* pCharClass = xCharClass.get();
    for (auto const& rMapEntry : rMapping)
    {
        OpCode eOp = OpCode(rMapEntry.Token.OpCode);
diff --git a/sc/inc/compiler.hxx b/sc/inc/compiler.hxx
index 91933ae..a428721 100644
--- a/sc/inc/compiler.hxx
+++ b/sc/inc/compiler.hxx
@@ -254,7 +254,8 @@ public:

private:

    static CharClass            *pCharClassEnglish;                      // character classification for en_US locale
    static const CharClass      *pCharClassEnglish;     // character classification for en_US locale
    static const CharClass      *pCharClassLocalized;   // character classification for UI locale
    static const Convention     *pConventions[ formula::FormulaGrammar::CONV_LAST ];

    static const struct AddInMap
@@ -285,7 +286,8 @@ private:

    std::queue<OpCode> maPendingOpCodes; // additional opcodes generated from a single symbol

    const CharClass*    pCharClass;         // which character classification is used for parseAnyToken
    const CharClass* pCharClass; // which character classification is used for parseAnyToken and upper/lower
    bool        mbCharClassesDiffer;    // whether pCharClass and current system locale's CharClass differ
    sal_uInt16      mnPredetectedReference;     // reference when reading ODF, 0 (none), 1 (single) or 2 (double)
    sal_Int32   mnRangeOpPosInSymbol;       // if and where a range operator is in symbol
    const Convention *pConv;
@@ -322,6 +324,7 @@ private:
#endif

    bool   NextNewToken(bool bInArray);
    bool ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& rOrg ) const;

    virtual void SetError(FormulaError nError) override;
    sal_Int32 NextSymbol(bool bInArray);
@@ -352,7 +355,8 @@ private:
     */
    ScRangeData* GetRangeData( const formula::FormulaToken& pToken ) const;

    static void InitCharClassEnglish();
    static const CharClass* GetCharClassEnglish();
    static const CharClass* GetCharClassLocalized();

public:
    ScCompiler( sc::CompileFormulaContext& rCxt, const ScAddress& rPos,
diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx
index eeb8beb..fea9d46 100644
--- a/sc/source/core/tool/compiler.cxx
+++ b/sc/source/core/tool/compiler.cxx
@@ -22,6 +22,7 @@
#include <compiler.hxx>

#include <vcl/svapp.hxx>
#include <vcl/settings.hxx>
#include <sfx2/app.hxx>
#include <sfx2/objsh.hxx>
#include <basic/sbmeth.hxx>
@@ -79,7 +80,8 @@ using namespace formula;
using namespace ::com::sun::star;
using ::std::vector;

CharClass*                          ScCompiler::pCharClassEnglish = nullptr;
const CharClass*                    ScCompiler::pCharClassEnglish = nullptr;
const CharClass*                    ScCompiler::pCharClassLocalized = nullptr;
const ScCompiler::Convention*       ScCompiler::pConventions[ ]   = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };

namespace {
@@ -173,12 +175,17 @@ void ScCompiler::DeInit()
        delete pCharClassEnglish;
        pCharClassEnglish = nullptr;
    }
    if (pCharClassLocalized)
    {
        delete pCharClassLocalized;
        pCharClassLocalized = nullptr;
    }
}

bool ScCompiler::IsEnglishSymbol( const OUString& rName )
{
    // function names are always case-insensitive
    OUString aUpper = ScGlobal::getCharClassPtr()->uppercase(rName);
    OUString aUpper = GetCharClassEnglish()->uppercase(rName);

    // 1. built-in function name
    OpCode eOp = ScCompiler::GetEnglishOpCode( aUpper );
@@ -197,11 +204,27 @@ bool ScCompiler::IsEnglishSymbol( const OUString& rName )
    return !aIntName.isEmpty();       // no valid function name
}

void ScCompiler::InitCharClassEnglish()
const CharClass* ScCompiler::GetCharClassEnglish()
{
    css::lang::Locale aLocale( "en", "US", "");
    pCharClassEnglish = new CharClass(
            ::comphelper::getProcessComponentContext(), LanguageTag( aLocale));
    if (!pCharClassEnglish)
    {
        css::lang::Locale aLocale( "en", "US", "");
        pCharClassEnglish = new CharClass(
                ::comphelper::getProcessComponentContext(), LanguageTag( aLocale));
    }
    return pCharClassEnglish;
}

const CharClass* ScCompiler::GetCharClassLocalized()
{
    if (!pCharClassLocalized)
    {
        // Switching UI language requires restart; if not, we would have to
        // keep track of that.
        pCharClassLocalized = new CharClass(
                ::comphelper::getProcessComponentContext(), Application::GetSettings().GetUILanguageTag());
    }
    return pCharClassLocalized;
}

void ScCompiler::SetGrammar( const FormulaGrammar::Grammar eGrammar )
@@ -266,13 +289,19 @@ void ScCompiler::SetFormulaLanguage( const ScCompiler::OpCodeMapPtr & xMap )
    {
        mxSymbols = xMap;
        if (mxSymbols->isEnglish())
        {
            if (!pCharClassEnglish)
                InitCharClassEnglish();
            pCharClass = pCharClassEnglish;
        }
            pCharClass = GetCharClassEnglish();
        else
            pCharClass = ScGlobal::getCharClassPtr();
            pCharClass = GetCharClassLocalized();

        // The difference is needed for an uppercase() call that usually does not
        // result in different strings but for a few languages like Turkish;
        // though even de-DE and de-CH may differ in ß/SS handling..
        // At least don't care if both are English.
        // The current locale is more likely to not be "en" so check first.
        const LanguageTag& rLT1 = ScGlobal::getCharClassPtr()->getLanguageTag();
        const LanguageTag& rLT2 = pCharClass->getLanguageTag();
        mbCharClassesDiffer = (rLT1 != rLT2 && (rLT1.getLanguage() != "en" || rLT2.getLanguage() != "en"));

        SetGrammarAndRefConvention( mxSymbols->getGrammar(), GetGrammar());
    }
}
@@ -1817,6 +1846,7 @@ ScCompiler::ScCompiler( sc::CompileFormulaContext& rCxt, const ScAddress& rPos, 
    mnCurrentSheetTab(-1),
    mnCurrentSheetEndPos(0),
    pCharClass(ScGlobal::getCharClassPtr()),
    mbCharClassesDiffer(false),
    mnPredetectedReference(0),
    mnRangeOpPosInSymbol(-1),
    pConv(GetRefConvention(FormulaGrammar::CONV_OOO)),
@@ -1840,6 +1870,7 @@ ScCompiler::ScCompiler( ScDocument* pDocument, const ScAddress& rPos, ScTokenArr
        mnCurrentSheetEndPos(0),
        nSrcPos(0),
        pCharClass( ScGlobal::getCharClassPtr() ),
        mbCharClassesDiffer(false),
        mnPredetectedReference(0),
        mnRangeOpPosInSymbol(-1),
        pConv( GetRefConvention( FormulaGrammar::CONV_OOO ) ),
@@ -1862,6 +1893,7 @@ ScCompiler::ScCompiler( sc::CompileFormulaContext& rCxt, const ScAddress& rPos,
    mnCurrentSheetTab(-1),
    mnCurrentSheetEndPos(0),
    pCharClass(ScGlobal::getCharClassPtr()),
    mbCharClassesDiffer(false),
    mnPredetectedReference(0),
    mnRangeOpPosInSymbol(-1),
    pConv(GetRefConvention(FormulaGrammar::CONV_OOO)),
@@ -1885,6 +1917,7 @@ ScCompiler::ScCompiler( ScDocument* pDocument, const ScAddress& rPos,
        mnCurrentSheetEndPos(0),
        nSrcPos(0),
        pCharClass( ScGlobal::getCharClassPtr() ),
        mbCharClassesDiffer(false),
        mnPredetectedReference(0),
        mnRangeOpPosInSymbol(-1),
        pConv( GetRefConvention( FormulaGrammar::CONV_OOO ) ),
@@ -4180,9 +4213,9 @@ void ScCompiler::AutoCorrectParsedSymbol()
    }
}

static bool lcl_UpperAsciiOrI18n( OUString& rUpper, const OUString& rOrg, FormulaGrammar::Grammar eGrammar )
bool ScCompiler::ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& rOrg ) const
{
    if (FormulaGrammar::isODFF( eGrammar ))
    if (FormulaGrammar::isODFF( meGrammar ))
    {
        // ODFF has a defined set of English function names, avoid i18n
        // overhead.
@@ -4191,7 +4224,8 @@ static bool lcl_UpperAsciiOrI18n( OUString& rUpper, const OUString& rOrg, Formul
    }
    else
    {
        rUpper = ScGlobal::getCharClassPtr()->uppercase(rOrg);
        // One of localized or English.
        rUpper = pCharClass->uppercase(rOrg);
        return false;
    }
}
@@ -4285,7 +4319,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
    else
    {
        OUString aTmpStr( cSymbol[0] );
        bMayBeFuncName = ScGlobal::getCharClassPtr()->isLetter( aTmpStr, 0 );
        bMayBeFuncName = pCharClass->isLetter( aTmpStr, 0 );
        bAsciiNonAlnum = false;
    }

@@ -4336,7 +4370,7 @@ bool ScCompiler::NextNewToken( bool bInArray )

        if (bAsciiNonAlnum)
        {
            bAsciiUpper = lcl_UpperAsciiOrI18n( aUpper, aOrg, meGrammar);
            bAsciiUpper = ToUpperAsciiOrI18nIsAscii( aUpper, aOrg);
            if (cSymbol[0] == '#')
            {
                // Check for TableRef item specifiers first.
@@ -4362,7 +4396,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
        if (bMayBeFuncName)
        {
            if (aUpper.isEmpty())
                bAsciiUpper = lcl_UpperAsciiOrI18n( aUpper, aOrg, meGrammar);
                bAsciiUpper = ToUpperAsciiOrI18nIsAscii( aUpper, aOrg);
            if (IsOpCode( aUpper, bInArray ))
                return true;
        }
@@ -4386,7 +4420,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
        }

        if (aUpper.isEmpty())
            bAsciiUpper = lcl_UpperAsciiOrI18n( aUpper, aOrg, meGrammar);
            bAsciiUpper = ToUpperAsciiOrI18nIsAscii( aUpper, aOrg);

        // IsBoolean() before IsValue() to catch inline bools without the kludge
        //    for inline arrays.
@@ -4397,8 +4431,14 @@ bool ScCompiler::NextNewToken( bool bInArray )
            return true;

        // User defined names and such do need i18n upper also in ODF.
        if (bAsciiUpper)
        if (bAsciiUpper || mbCharClassesDiffer)
        {
            // Use current system locale here because user defined symbols are
            // more likely in that localized language than in the formula
            // language. This in corner cases needs to continue to work for
            // existing documents and environments.
            aUpper = ScGlobal::getCharClassPtr()->uppercase( aOrg );
        }

        if (IsNamedRange( aUpper ))
            return true;
@@ -4456,7 +4496,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
    // Provide single token information and continue. Do not set an error, that
    // would prematurely end compilation. Simple unknown names are handled by
    // the interpreter.
    aUpper = ScGlobal::getCharClassPtr()->lowercase( aUpper );
    aUpper = pCharClass->lowercase( aUpper );
    svl::SharedString aSS = pDoc->GetSharedStringPool().intern(aUpper);
    maRawToken.SetString(aSS.getData(), aSS.getDataIgnoreCase());
    maRawToken.NewOpCode( ocBad );