Resolves: tdf#137091 Use CharClass matching the formula language
... not the current locale. Specifically important for
uppercase/lowercase conversions that may yield different results
for example in Turkish i with/without dot.
Change-Id: I2aa57cdcf530d7a0697c4ffbd5dccb86bb526d8e
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/103588
Tested-by: Jenkins
Reviewed-by: Eike Rathke <erack@redhat.com>
diff --git a/formula/source/core/api/FormulaCompiler.cxx b/formula/source/core/api/FormulaCompiler.cxx
index e6a224f..e969ecb 100644
--- a/formula/source/core/api/FormulaCompiler.cxx
+++ b/formula/source/core/api/FormulaCompiler.cxx
@@ -29,6 +29,9 @@
#include <svl/zforlist.hxx>
#include <unotools/charclass.hxx>
#include <vcl/svapp.hxx>
#include <vcl/settings.hxx>
#include <comphelper/processfactory.hxx>
#include <com/sun/star/sheet/FormulaOpCodeMapEntry.hpp>
#include <com/sun/star/sheet/FormulaMapGroup.hpp>
#include <com/sun/star/sheet/FormulaMapGroupSpecialOffset.hpp>
@@ -140,6 +143,14 @@ void lclPushOpCodeMapEntries( ::std::vector< sheet::FormulaOpCodeMapEntry >& rVe
lclPushOpCodeMapEntry( rVec, pTable, *pnOpCodes );
}
CharClass* createCharClassIfNonEnglishUI()
{
const LanguageTag& rLanguageTag( Application::GetSettings().GetUILanguageTag());
if (rLanguageTag.getLanguage() == "en")
return nullptr;
return new CharClass( ::comphelper::getProcessComponentContext(), rLanguageTag);
}
class OpCodeList
{
public:
@@ -163,8 +174,8 @@ OpCodeList::OpCodeList(bool bLocalized, const std::pair<const char*, int>* pSymb
, mpSymbols(pSymbols)
, mbLocalized(bLocalized)
{
SvtSysLocale aSysLocale;
const CharClass* pCharClass = (xMap->isEnglish() ? nullptr : aSysLocale.GetCharClassPtr());
std::unique_ptr<CharClass> xCharClass( xMap->isEnglish() ? nullptr : createCharClassIfNonEnglishUI());
const CharClass* pCharClass = xCharClass.get();
if (meSepType == FormulaCompiler::SeparatorType::RESOURCE_BASE)
{
for (sal_uInt16 i = 0; i <= SC_OPCODE_LAST_OPCODE_ID; ++i)
@@ -809,8 +820,8 @@ FormulaCompiler::OpCodeMapPtr FormulaCompiler::CreateOpCodeMap(
NonConstOpCodeMapPtr xMap = std::make_shared<OpCodeMap>( SC_OPCODE_LAST_OPCODE_ID + 1, false,
FormulaGrammar::mergeToGrammar( FormulaGrammar::setEnglishBit(
FormulaGrammar::GRAM_EXTERNAL, bEnglish), FormulaGrammar::CONV_UNSPECIFIED));
SvtSysLocale aSysLocale;
const CharClass* pCharClass = (xMap->isEnglish() ? nullptr : aSysLocale.GetCharClassPtr());
std::unique_ptr<CharClass> xCharClass( xMap->isEnglish() ? nullptr : createCharClassIfNonEnglishUI());
const CharClass* pCharClass = xCharClass.get();
for (auto const& rMapEntry : rMapping)
{
OpCode eOp = OpCode(rMapEntry.Token.OpCode);
diff --git a/sc/inc/compiler.hxx b/sc/inc/compiler.hxx
index b09bce0..c4550a2 100644
--- a/sc/inc/compiler.hxx
+++ b/sc/inc/compiler.hxx
@@ -254,7 +254,8 @@ public:
private:
static CharClass *pCharClassEnglish; // character classification for en_US locale
static const CharClass *pCharClassEnglish; // character classification for en_US locale
static const CharClass *pCharClassLocalized; // character classification for UI locale
static const Convention *pConventions[ formula::FormulaGrammar::CONV_LAST ];
static const struct AddInMap
@@ -285,7 +286,7 @@ private:
std::queue<OpCode> maPendingOpCodes; // additional opcodes generated from a single symbol
const CharClass* pCharClass; // which character classification is used for parseAnyToken
const CharClass* pCharClass; // which character classification is used for parseAnyToken and upper/lower
sal_uInt16 mnPredetectedReference; // reference when reading ODF, 0 (none), 1 (single) or 2 (double)
sal_Int32 mnRangeOpPosInSymbol; // if and where a range operator is in symbol
const Convention *pConv;
@@ -322,6 +323,7 @@ private:
#endif
bool NextNewToken(bool bInArray);
bool ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& rOrg ) const;
virtual void SetError(FormulaError nError) override;
sal_Int32 NextSymbol(bool bInArray);
@@ -352,7 +354,8 @@ private:
*/
ScRangeData* GetRangeData( const formula::FormulaToken& pToken ) const;
static void InitCharClassEnglish();
static const CharClass* GetCharClassEnglish();
static const CharClass* GetCharClassLocalized();
public:
ScCompiler( sc::CompileFormulaContext& rCxt, const ScAddress& rPos,
diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx
index 9ab2b61..2050a21 100644
--- a/sc/source/core/tool/compiler.cxx
+++ b/sc/source/core/tool/compiler.cxx
@@ -22,6 +22,7 @@
#include <compiler.hxx>
#include <vcl/svapp.hxx>
#include <vcl/settings.hxx>
#include <sfx2/app.hxx>
#include <sfx2/objsh.hxx>
#include <basic/sbmeth.hxx>
@@ -79,7 +80,8 @@ using namespace formula;
using namespace ::com::sun::star;
using ::std::vector;
CharClass* ScCompiler::pCharClassEnglish = nullptr;
const CharClass* ScCompiler::pCharClassEnglish = nullptr;
const CharClass* ScCompiler::pCharClassLocalized = nullptr;
const ScCompiler::Convention* ScCompiler::pConventions[ ] = { nullptr, nullptr, nullptr, nullptr, nullptr, nullptr };
namespace {
@@ -173,12 +175,17 @@ void ScCompiler::DeInit()
delete pCharClassEnglish;
pCharClassEnglish = nullptr;
}
if (pCharClassLocalized)
{
delete pCharClassLocalized;
pCharClassLocalized = nullptr;
}
}
bool ScCompiler::IsEnglishSymbol( const OUString& rName )
{
// function names are always case-insensitive
OUString aUpper = ScGlobal::getCharClassPtr()->uppercase(rName);
OUString aUpper = GetCharClassEnglish()->uppercase(rName);
// 1. built-in function name
formula::FormulaCompiler aCompiler;
@@ -198,11 +205,27 @@ bool ScCompiler::IsEnglishSymbol( const OUString& rName )
return !aIntName.isEmpty(); // no valid function name
}
void ScCompiler::InitCharClassEnglish()
const CharClass* ScCompiler::GetCharClassEnglish()
{
css::lang::Locale aLocale( "en", "US", "");
pCharClassEnglish = new CharClass(
::comphelper::getProcessComponentContext(), LanguageTag( aLocale));
if (!pCharClassEnglish)
{
css::lang::Locale aLocale( "en", "US", "");
pCharClassEnglish = new CharClass(
::comphelper::getProcessComponentContext(), LanguageTag( aLocale));
}
return pCharClassEnglish;
}
const CharClass* ScCompiler::GetCharClassLocalized()
{
if (!pCharClassLocalized)
{
// Switching UI language requires restart; if not, we would have to
// keep track of that.
pCharClassLocalized = new CharClass(
::comphelper::getProcessComponentContext(), Application::GetSettings().GetUILanguageTag());
}
return pCharClassLocalized;
}
void ScCompiler::SetGrammar( const FormulaGrammar::Grammar eGrammar )
@@ -268,13 +291,9 @@ void ScCompiler::SetFormulaLanguage( const ScCompiler::OpCodeMapPtr & xMap )
mxSymbols = xMap;
if (mxSymbols->isEnglish())
{
if (!pCharClassEnglish)
InitCharClassEnglish();
pCharClass = pCharClassEnglish;
}
pCharClass = GetCharClassEnglish();
else
pCharClass = ScGlobal::getCharClassPtr();
pCharClass = GetCharClassLocalized();
SetGrammarAndRefConvention( mxSymbols->getGrammar(), GetGrammar());
}
@@ -4174,9 +4193,9 @@ void ScCompiler::AutoCorrectParsedSymbol()
}
}
static bool lcl_UpperAsciiOrI18n( OUString& rUpper, const OUString& rOrg, FormulaGrammar::Grammar eGrammar )
bool ScCompiler::ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& rOrg ) const
{
if (FormulaGrammar::isODFF( eGrammar ))
if (FormulaGrammar::isODFF( meGrammar ))
{
// ODFF has a defined set of English function names, avoid i18n
// overhead.
@@ -4185,7 +4204,8 @@ static bool lcl_UpperAsciiOrI18n( OUString& rUpper, const OUString& rOrg, Formul
}
else
{
rUpper = ScGlobal::getCharClassPtr()->uppercase(rOrg);
// One of localized or English.
rUpper = pCharClass->uppercase(rOrg);
return false;
}
}
@@ -4279,7 +4299,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
else
{
OUString aTmpStr( cSymbol[0] );
bMayBeFuncName = ScGlobal::getCharClassPtr()->isLetter( aTmpStr, 0 );
bMayBeFuncName = pCharClass->isLetter( aTmpStr, 0 );
bAsciiNonAlnum = false;
}
@@ -4330,7 +4350,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
if (bAsciiNonAlnum)
{
bAsciiUpper = lcl_UpperAsciiOrI18n( aUpper, aOrg, meGrammar);
bAsciiUpper = ToUpperAsciiOrI18nIsAscii( aUpper, aOrg);
if (cSymbol[0] == '#')
{
// Check for TableRef item specifiers first.
@@ -4356,7 +4376,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
if (bMayBeFuncName)
{
if (aUpper.isEmpty())
bAsciiUpper = lcl_UpperAsciiOrI18n( aUpper, aOrg, meGrammar);
bAsciiUpper = ToUpperAsciiOrI18nIsAscii( aUpper, aOrg);
if (IsOpCode( aUpper, bInArray ))
return true;
}
@@ -4380,7 +4400,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
}
if (aUpper.isEmpty())
bAsciiUpper = lcl_UpperAsciiOrI18n( aUpper, aOrg, meGrammar);
bAsciiUpper = ToUpperAsciiOrI18nIsAscii( aUpper, aOrg);
// IsBoolean() before IsValue() to catch inline bools without the kludge
// for inline arrays.
@@ -4392,7 +4412,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
// User defined names and such do need i18n upper also in ODF.
if (bAsciiUpper)
aUpper = ScGlobal::getCharClassPtr()->uppercase( aOrg );
aUpper = pCharClass->uppercase( aOrg );
if (IsNamedRange( aUpper ))
return true;
@@ -4450,7 +4470,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
// Provide single token information and continue. Do not set an error, that
// would prematurely end compilation. Simple unknown names are handled by
// the interpreter.
aUpper = ScGlobal::getCharClassPtr()->lowercase( aUpper );
aUpper = pCharClass->lowercase( aUpper );
svl::SharedString aSS = rDoc.GetSharedStringPool().intern(aUpper);
maRawToken.SetString(aSS.getData(), aSS.getDataIgnoreCase());
maRawToken.NewOpCode( ocBad );