Resolves: tdf#76310 Preserve whitespace TAB, CR, LF in formula expressions

Allowed whitespace in ODFF and OOXML are
U+0020 SPACE
U+0009 CHARACTER TABULATION
U+000A LINE FEED
U+000D CARRIAGE RETURN

Line feed and carriage return look a bit funny in the Function Wizard if
part of a function's argument but work. Once a formula is edited, CR are
converted to LF though, probably already in EditEngine, didn't
investigate.

Change-Id: I6278f6be48872e0710a3d74212db391dda249ed2
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/119635
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
diff --git a/formula/source/core/api/FormulaCompiler.cxx b/formula/source/core/api/FormulaCompiler.cxx
index be5ce09..f717480 100644
--- a/formula/source/core/api/FormulaCompiler.cxx
+++ b/formula/source/core/api/FormulaCompiler.cxx
@@ -475,7 +475,8 @@ uno::Sequence< sheet::FormulaOpCodeMapEntry > FormulaCompiler::OpCodeMap::create
            { FormulaMapGroupSpecialOffset::DB_AREA           , ocDBArea }         ,
            /* TODO: { FormulaMapGroupSpecialOffset::TABLE_REF         , ocTableRef }       , */
            { FormulaMapGroupSpecialOffset::MACRO             , ocMacro }          ,
            { FormulaMapGroupSpecialOffset::COL_ROW_NAME      , ocColRowName }
            { FormulaMapGroupSpecialOffset::COL_ROW_NAME      , ocColRowName }     ,
            { FormulaMapGroupSpecialOffset::WHITESPACE        , ocWhitespace }
        };
        const size_t nCount = SAL_N_ELEMENTS(aMap);
        // Preallocate vector elements.
@@ -1267,14 +1268,18 @@ bool FormulaCompiler::GetToken()
             nWasColRowName = 1;
        else
             nWasColRowName = 0;
        OpCode eTmpOp;
        mpToken = maArrIterator.Next();
        while( mpToken && mpToken->GetOpCode() == ocSpaces )
        while (mpToken && ((eTmpOp = mpToken->GetOpCode()) == ocSpaces || eTmpOp == ocWhitespace))
        {
            // For significant whitespace remember last ocSpaces token. Usually
            // there's only one even for multiple spaces.
            pSpacesToken = mpToken;
            if ( nWasColRowName )
                nWasColRowName++;
            if (eTmpOp == ocSpaces)
            {
                // For significant whitespace remember last ocSpaces token.
                // Usually there's only one even for multiple spaces.
                pSpacesToken = mpToken;
                if ( nWasColRowName )
                    nWasColRowName++;
            }
            if ( bAutoCorrect && !pStack )
                CreateStringFromToken( aCorrectedFormula, mpToken.get() );
            mpToken = maArrIterator.Next();
@@ -2272,10 +2277,10 @@ const FormulaToken* FormulaCompiler::CreateStringFromToken( OUStringBuffer& rBuf
    if( bSpaces )
        rBuffer.append( ' ');

    if( eOp == ocSpaces )
    if (eOp == ocSpaces || eOp == ocWhitespace)
    {
        bool bWriteSpaces = true;
        if (mxSymbols->isODFF())
        if (eOp == ocSpaces && mxSymbols->isODFF())
        {
            const FormulaToken* p = maArrIterator.PeekPrevNoSpaces();
            bool bIntersectionOp = (p && p->GetOpCode() == ocColRowName);
@@ -2316,7 +2321,10 @@ const FormulaToken* FormulaCompiler::CreateStringFromToken( OUStringBuffer& rBuf
            sal_uInt8 n = t->GetByte();
            for ( sal_uInt8 j=0; j<n; ++j )
            {
                rBuffer.append( ' ');
                if (eOp == ocWhitespace)
                    rBuffer.append( t->GetChar());
                else
                    rBuffer.append( ' ');
            }
        }
    }
@@ -2403,6 +2411,7 @@ const FormulaToken* FormulaCompiler::CreateStringFromToken( OUStringBuffer& rBuf
                                case ocPush:
                                case ocRange:
                                case ocSpaces:
                                case ocWhitespace:
                                    break;
                                default:
                                    nLevel = 0;
diff --git a/formula/source/core/api/token.cxx b/formula/source/core/api/token.cxx
index 0af1f63..c5b69ac 100644
--- a/formula/source/core/api/token.cxx
+++ b/formula/source/core/api/token.cxx
@@ -244,6 +244,13 @@ void FormulaToken::SetSheet( sal_Int16 )
    assert( !"virtual dummy called" );
}

sal_Unicode FormulaToken::GetChar() const
{
    // This Get is worth an assert.
    assert( !"virtual dummy called" );
    return 0;
}

short* FormulaToken::GetJump() const
{
    SAL_WARN( "formula.core", "FormulaToken::GetJump: virtual dummy called" );
@@ -348,6 +355,15 @@ bool FormulaToken::TextEqual( const FormulaToken& rToken ) const
// real implementations of virtual functions


sal_uInt8   FormulaSpaceToken::GetByte() const  { return nByte; }
sal_Unicode FormulaSpaceToken::GetChar() const  { return cChar; }
bool FormulaSpaceToken::operator==( const FormulaToken& r ) const
{
    return FormulaToken::operator==( r ) && nByte == r.GetByte() &&
        cChar == r.GetChar();
}


sal_uInt8   FormulaByteToken::GetByte() const           { return nByte; }
void        FormulaByteToken::SetByte( sal_uInt8 n )    { nByte = n; }
ParamClass  FormulaByteToken::GetInForceArray() const    { return eInForceArray; }
@@ -425,6 +441,13 @@ bool FormulaTokenArray::AddFormulaToken(
                    AddStringXML( aStrVal );
                else if ( eOpCode == ocExternal || eOpCode == ocMacro )
                    Add( new formula::FormulaExternalToken( eOpCode, aStrVal ) );
                else if ( eOpCode == ocWhitespace )
                {
                    // Simply ignore empty string.
                    // Convention is one character repeated.
                    if (!aStrVal.isEmpty())
                        Add( new formula::FormulaSpaceToken( static_cast<sal_uInt8>(aStrVal.getLength()), aStrVal[0]));
                }
                else
                    bError = true;      // unexpected string: don't know what to do with it
            }
@@ -1472,17 +1495,21 @@ FormulaTokenArray * FormulaTokenArray::RewriteMissing( const MissingConvention &
    return pNewArr;
}

namespace {
inline bool isWhitespace( OpCode eOp ) { return eOp == ocSpaces || eOp == ocWhitespace; }
}

bool FormulaTokenArray::MayReferenceFollow()
{
    if ( pCode && nLen > 0 )
    {
        // ignore trailing spaces
        sal_uInt16 i = nLen - 1;
        while ( i > 0 && pCode[i]->GetOpCode() == SC_OPCODE_SPACES )
        while (i > 0 && isWhitespace( pCode[i]->GetOpCode()))
        {
            --i;
        }
        if ( i > 0 || pCode[i]->GetOpCode() != SC_OPCODE_SPACES )
        if (i > 0 || !isWhitespace( pCode[i]->GetOpCode()))
        {
            OpCode eOp = pCode[i]->GetOpCode();
            if ( (SC_OPCODE_START_BIN_OP <= eOp && eOp < SC_OPCODE_STOP_BIN_OP ) ||
@@ -1756,7 +1783,7 @@ FormulaToken* FormulaTokenArrayPlainIterator::NextNoSpaces()
{
    if( mpFTA->GetArray() )
    {
        while( (mnIndex < mpFTA->GetLen()) && (mpFTA->GetArray()[ mnIndex ]->GetOpCode() == ocSpaces) )
        while ((mnIndex < mpFTA->GetLen()) && isWhitespace( mpFTA->GetArray()[ mnIndex ]->GetOpCode()))
            ++mnIndex;
        if( mnIndex < mpFTA->GetLen() )
            return mpFTA->GetArray()[ mnIndex++ ];
@@ -1793,7 +1820,7 @@ FormulaToken* FormulaTokenArrayPlainIterator::PeekNextNoSpaces() const
    if( mpFTA->GetArray() && mnIndex < mpFTA->GetLen() )
    {
        sal_uInt16 j = mnIndex;
        while ( j < mpFTA->GetLen() && mpFTA->GetArray()[j]->GetOpCode() == ocSpaces )
        while (j < mpFTA->GetLen() && isWhitespace( mpFTA->GetArray()[j]->GetOpCode()))
            j++;
        if ( j < mpFTA->GetLen() )
            return mpFTA->GetArray()[ j ];
@@ -1809,9 +1836,9 @@ FormulaToken* FormulaTokenArrayPlainIterator::PeekPrevNoSpaces() const
    if( mpFTA->GetArray() && mnIndex > 1 )
    {
        sal_uInt16 j = mnIndex - 2;
        while ( mpFTA->GetArray()[j]->GetOpCode() == ocSpaces && j > 0 )
        while (isWhitespace( mpFTA->GetArray()[j]->GetOpCode()) && j > 0 )
            j--;
        if ( j > 0 || mpFTA->GetArray()[j]->GetOpCode() != ocSpaces )
        if (j > 0 || !isWhitespace( mpFTA->GetArray()[j]->GetOpCode()))
            return mpFTA->GetArray()[ j ];
        else
            return nullptr;
diff --git a/formula/source/ui/dlg/formula.cxx b/formula/source/ui/dlg/formula.cxx
index 81931d8..36b59d5 100644
--- a/formula/source/ui/dlg/formula.cxx
+++ b/formula/source/ui/dlg/formula.cxx
@@ -389,6 +389,9 @@ sal_Int32 FormulaDlg_Impl::GetFunctionPos(sal_Int32 nPos)
        sal_Int32 nOldTokPos = 1;
        sal_Int32 nPrevFuncPos = 1;
        short nBracketCount = 0;
        const sal_Int32 nOpPush = m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::PUSH].Token.OpCode;
        const sal_Int32 nOpSpaces = m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::SPACES].Token.OpCode;
        const sal_Int32 nOpWhitespace = m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::WHITESPACE].Token.OpCode;
        while ( pIter != pEnd )
        {
            const sal_Int32 eOp = pIter->OpCode;
@@ -401,8 +404,7 @@ sal_Int32 FormulaDlg_Impl::GetFunctionPos(sal_Int32 nPos)
                m_xBtnMatrix->set_active(true);
            }

            if (eOp == m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::PUSH].Token.OpCode ||
                eOp == m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::SPACES].Token.OpCode)
            if (eOp == nOpPush || eOp == nOpSpaces || eOp == nOpWhitespace)
            {
                const sal_Int32 n1 = nTokPos < 0 ? -1 : aFormString.indexOf( sep, nTokPos);
                const sal_Int32 n2 = nTokPos < 0 ? -1 : aFormString.indexOf( ')', nTokPos);
@@ -444,7 +446,7 @@ sal_Int32 FormulaDlg_Impl::GetFunctionPos(sal_Int32 nPos)
                    m_pFunctionOpCodesEnd,
                    [&eOp](const sheet::FormulaOpCodeMapEntry& aEntry) { return aEntry.Token.OpCode == eOp; });

            if ( bIsFunction && m_aSpecialOpCodes[sheet::FormulaMapGroupSpecialOffset::SPACES].Token.OpCode != eOp )
            if ( bIsFunction && nOpSpaces != eOp && nOpWhitespace != eOp )
            {
                nPrevFuncPos = nFuncPos;
                nFuncPos = nOldTokPos;
diff --git a/include/formula/compiler.hxx b/include/formula/compiler.hxx
index baf3e23..fcf7326 100644
--- a/include/formula/compiler.hxx
+++ b/include/formula/compiler.hxx
@@ -40,26 +40,27 @@
#define SC_OPCODE_BAD                14
#define SC_OPCODE_STRINGXML          15
#define SC_OPCODE_SPACES             16
#define SC_OPCODE_MAT_REF            17
#define SC_OPCODE_DB_AREA            18     /* additional access operators */
#define SC_OPCODE_TABLE_REF          19
#define SC_OPCODE_MACRO              20
#define SC_OPCODE_COL_ROW_NAME       21
#define SC_OPCODE_COL_ROW_NAME_AUTO  22
#define SC_OPCODE_PERCENT_SIGN       23     /* operator _follows_ value */
#define SC_OPCODE_ARRAY_OPEN         24
#define SC_OPCODE_ARRAY_CLOSE        25
#define SC_OPCODE_ARRAY_ROW_SEP      26
#define SC_OPCODE_ARRAY_COL_SEP      27     /* some convs use sep != col_sep */
#define SC_OPCODE_TABLE_REF_OPEN     28
#define SC_OPCODE_TABLE_REF_CLOSE    29
#define SC_OPCODE_TABLE_REF_ITEM_ALL      30
#define SC_OPCODE_TABLE_REF_ITEM_HEADERS  31
#define SC_OPCODE_TABLE_REF_ITEM_DATA     32
#define SC_OPCODE_TABLE_REF_ITEM_TOTALS   33
#define SC_OPCODE_TABLE_REF_ITEM_THIS_ROW 34
#define SC_OPCODE_STOP_DIV           35
#define SC_OPCODE_SKIP               36     /* used to skip raw tokens during string compilation */
#define SC_OPCODE_WHITESPACE         17
#define SC_OPCODE_MAT_REF            18
#define SC_OPCODE_DB_AREA            19     /* additional access operators */
#define SC_OPCODE_TABLE_REF          20
#define SC_OPCODE_MACRO              21
#define SC_OPCODE_COL_ROW_NAME       22
#define SC_OPCODE_COL_ROW_NAME_AUTO  23
#define SC_OPCODE_PERCENT_SIGN       24     /* operator _follows_ value */
#define SC_OPCODE_ARRAY_OPEN         25
#define SC_OPCODE_ARRAY_CLOSE        26
#define SC_OPCODE_ARRAY_ROW_SEP      27
#define SC_OPCODE_ARRAY_COL_SEP      28     /* some convs use sep != col_sep */
#define SC_OPCODE_TABLE_REF_OPEN     29
#define SC_OPCODE_TABLE_REF_CLOSE    30
#define SC_OPCODE_TABLE_REF_ITEM_ALL      31
#define SC_OPCODE_TABLE_REF_ITEM_HEADERS  32
#define SC_OPCODE_TABLE_REF_ITEM_DATA     33
#define SC_OPCODE_TABLE_REF_ITEM_TOTALS   34
#define SC_OPCODE_TABLE_REF_ITEM_THIS_ROW 35
#define SC_OPCODE_STOP_DIV           36
#define SC_OPCODE_SKIP               37     /* used to skip raw tokens during string compilation */

/*** error constants #... ***/
#define SC_OPCODE_START_ERRORS       40
diff --git a/include/formula/opcode.hxx b/include/formula/opcode.hxx
index 3123e8f..d92ae0b 100644
--- a/include/formula/opcode.hxx
+++ b/include/formula/opcode.hxx
@@ -53,6 +53,7 @@ enum OpCode : sal_uInt16
        ocBad               = SC_OPCODE_BAD,
        ocStringXML         = SC_OPCODE_STRINGXML,
        ocSpaces            = SC_OPCODE_SPACES,
        ocWhitespace        = SC_OPCODE_WHITESPACE,
        ocMatRef            = SC_OPCODE_MAT_REF,
        ocTableRefItemAll     = SC_OPCODE_TABLE_REF_ITEM_ALL,
        ocTableRefItemHeaders = SC_OPCODE_TABLE_REF_ITEM_HEADERS,
@@ -545,6 +546,7 @@ inline std::string OpCodeEnumToString(OpCode eCode)
    case ocBad: return "Bad";
    case ocStringXML: return "StringXML";
    case ocSpaces: return "Spaces";
    case ocWhitespace: return "Whitespace";
    case ocMatRef: return "MatRef";
    case ocTableRefItemAll: return "TableRefItemAll";
    case ocTableRefItemHeaders: return "TableRefItemHeaders";
diff --git a/include/formula/token.hxx b/include/formula/token.hxx
index 3fa00e8..77bf3ee 100644
--- a/include/formula/token.hxx
+++ b/include/formula/token.hxx
@@ -187,6 +187,7 @@ public:
    virtual void                SetIndex( sal_uInt16 n );
    virtual sal_Int16           GetSheet() const;
    virtual void                SetSheet( sal_Int16 n );
    virtual sal_Unicode         GetChar() const;
    virtual short*              GetJump() const;
    virtual const OUString&     GetExternal() const;
    virtual FormulaToken*       GetFAPOrigToken() const;
@@ -225,6 +226,25 @@ inline void intrusive_ptr_release(const FormulaToken* p)
    p->DecRef();
}

class FORMULA_DLLPUBLIC FormulaSpaceToken : public FormulaToken
{
private:
            sal_uInt8           nByte;
            sal_Unicode         cChar;
public:
                                FormulaSpaceToken( sal_uInt8 n, sal_Unicode c ) :
                                    FormulaToken( svByte, ocWhitespace ),
                                    nByte( n ), cChar( c ) {}
                                FormulaSpaceToken( const FormulaSpaceToken& r ) :
                                    FormulaToken( r ),
                                    nByte( r.nByte ), cChar( r.cChar ) {}

    virtual FormulaToken*       Clone() const override { return new FormulaSpaceToken(*this); }
    virtual sal_uInt8           GetByte() const override;
    virtual sal_Unicode         GetChar() const override;
    virtual bool                operator==( const FormulaToken& rToken ) const override;
};

class FORMULA_DLLPUBLIC FormulaByteToken : public FormulaToken
{
private:
diff --git a/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl b/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl
index 89c21dc..4cb2699 100644
--- a/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl
+++ b/offapi/com/sun/star/sheet/FormulaMapGroupSpecialOffset.idl
@@ -140,10 +140,6 @@ constants FormulaMapGroupSpecialOffset
        <p>The FormulaToken::Data member shall contain a
        positive integer value of type `long` specifying the number
        of space characters.</p>

        <p>Attention: This may change in next versions to support other
        characters than simple space characters (e.g. line feeds, horizontal
        tabulators, non-breakable spaces).</p>
     */
    const long SPACES = 8;

@@ -176,6 +172,26 @@ constants FormulaMapGroupSpecialOffset
    const long COL_ROW_NAME       = 12;


    /** Formula tokens containing the op-code obtained from this offset
        describe whitespace characters within the string representation of a
        formula.

        <p>Whitespace characters in formulas are used for readability and do
        not affect the result of the formula.</p>

        <p>The FormulaToken::Data member shall contain a
        `string` of one (repeated) whitespace character. The length of
        the string determines the number of repetitions.</p>

        <p>Allowed whitespace characters are SPACE (U+0020), CHARACTER
        TABULATION (U+0009), LINE FEED (U+000A), and CARRIAGE RETURN
        (U+000D). See also ODF v1.3 OpenFormula 5.14 Whitespace.</p>

        @since LibreOffice 7.3
     */
    const long WHITESPACE = 13;


};


diff --git a/sc/inc/compiler.hxx b/sc/inc/compiler.hxx
index d8935c7..17e258d 100644
--- a/sc/inc/compiler.hxx
+++ b/sc/inc/compiler.hxx
@@ -108,6 +108,10 @@ public:
    union {
        double       nValue;
        struct {
            sal_uInt8           nCount;
            sal_Unicode         cChar;
        } whitespace;
        struct {
            sal_uInt8           cByte;
            formula::ParamClass eInForceArray;
        } sbyte;
@@ -326,7 +330,21 @@ private:
    bool ToUpperAsciiOrI18nIsAscii( OUString& rUpper, const OUString& rOrg ) const;

    virtual void SetError(FormulaError nError) override;
    sal_Int32 NextSymbol(bool bInArray);

    struct Whitespace final
    {
        sal_Int32   nCount;
        sal_Unicode cChar;

        Whitespace() : nCount(0), cChar(0x20) {}
        void reset( sal_Unicode c ) { nCount = 0; cChar = c; }
    };

    static void addWhitespace( std::vector<ScCompiler::Whitespace> & rvSpaces,
            ScCompiler::Whitespace & rSpace, sal_Unicode c, sal_Int32 n = 1 );

    std::vector<Whitespace> NextSymbol(bool bInArray);

    bool IsValue( const OUString& );
    bool IsOpCode( const OUString&, bool bInArray );
    bool IsOpCode2( const OUString& );
diff --git a/sc/source/core/tool/compiler.cxx b/sc/source/core/tool/compiler.cxx
index 0d1dc9d..83eb2f4 100644
--- a/sc/source/core/tool/compiler.cxx
+++ b/sc/source/core/tool/compiler.cxx
@@ -336,11 +336,8 @@ ScCompiler::Convention::Convention( FormulaGrammar::AddressConvention eConv )
    for (i = 0; i < 128; i++)
        t[i] = ScCharFlags::Illegal;

// tdf#56036: Allow tabs/newlines in imported formulas (for now simply treat them as (and convert to) space)
// TODO: tdf#76310: allow saving newlines as is (as per OpenFormula specification v.1.2, clause 5.14 "Whitespace")
// This is compliant with the OASIS decision (see https://issues.oasis-open.org/browse/OFFICE-701)
// Also, this would enable correct roundtrip from/to OOXML without losing tabs/newlines
// This requires saving actual space characters in ocSpaces token, using them in UI and saving
// Allow tabs/newlines.
// Allow saving whitespace as is (as per OpenFormula specification v.1.2, clause 5.14 "Whitespace").
/* tab */   t[ 9] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep;
/* lf  */   t[10] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep;
/* cr  */   t[13] = ScCharFlags::CharDontCare | ScCharFlags::WordSep | ScCharFlags::ValueSep;
@@ -2067,6 +2064,19 @@ static bool lcl_isUnicodeIgnoreAscii( const sal_Unicode* p1, const char* p2, siz
    return true;
}

// static
void ScCompiler::addWhitespace( std::vector<ScCompiler::Whitespace> & rvSpaces,
        ScCompiler::Whitespace & rSpace, sal_Unicode c, sal_Int32 n )
{
    if (rSpace.cChar != c)
    {
        if (rSpace.cChar && rSpace.nCount > 0)
            rvSpaces.emplace_back(rSpace);
        rSpace.reset(c);
    }
    rSpace.nCount += n;
}

// NextSymbol

// Parses the formula into separate symbols for further processing.
@@ -2104,8 +2114,9 @@ static bool lcl_isUnicodeIgnoreAscii( const sal_Unicode* p1, const char* p2, siz
//               | other             | Symbol=Symbol+char    | GetString
//---------------+-------------------+-----------------------+---------------

sal_Int32 ScCompiler::NextSymbol(bool bInArray)
std::vector<ScCompiler::Whitespace> ScCompiler::NextSymbol(bool bInArray)
{
    std::vector<Whitespace> vSpaces;
    cSymbol[MAXSTRLEN] = 0;       // end
    sal_Unicode* pSym = cSymbol;
    const sal_Unicode* const pStart = aFormula.getStr();
@@ -2116,7 +2127,7 @@ sal_Int32 ScCompiler::NextSymbol(bool bInArray)
    bool bQuote = false;
    mnRangeOpPosInSymbol = -1;
    ScanState eState = ssGetChar;
    sal_Int32 nSpaces = 0;
    Whitespace aSpace;
    sal_Unicode cSep = mxSymbols->getSymbolChar( ocSep);
    sal_Unicode cArrayColSep = mxSymbols->getSymbolChar( ocArrayColSep);
    sal_Unicode cArrayRowSep = mxSymbols->getSymbolChar( ocArrayRowSep);
@@ -2129,6 +2140,7 @@ sal_Int32 ScCompiler::NextSymbol(bool bInArray)

    int nDecSeps = 0;
    bool bAutoIntersection = false;
    size_t nAutoIntersectionSpacesPos = 0;
    int nRefInName = 0;
    bool bErrorConstantHadSlash = false;
    mnPredetectedReference = 0;
@@ -2187,7 +2199,12 @@ Label_MaskStateMachine:
                        if (!bAutoIntersection)
                        {
                            ++pSrc;
                            nSpaces += 2;   // must match the character count
                            // Add 2 because it must match the character count
                            // for bi18n.
                            addWhitespace( vSpaces, aSpace, 0x20, 2);
                            // Position of Whitespace where it will be added to
                            // vector.
                            nAutoIntersectionSpacesPos = vSpaces.size();
                            bAutoIntersection = true;
                        }
                        else
@@ -2267,7 +2284,7 @@ Label_MaskStateMachine:
                }
                else if( nMask & ScCharFlags::CharDontCare )
                {
                    nSpaces++;
                    addWhitespace( vSpaces, aSpace, c);
                }
                else if( nMask & ScCharFlags::CharIdent )
                {   // try to get a simple ASCII identifier before calling
@@ -2731,10 +2748,15 @@ Label_MaskStateMachine:
        cLast = c;
        c = *pSrc;
    }

    if (aSpace.nCount && aSpace.cChar)
        vSpaces.emplace_back(aSpace);

    if ( bi18n )
    {
        const sal_Int32 nOldSrcPos = nSrcPos;
        nSrcPos = nSrcPos + nSpaces;
        for (const auto& r : vSpaces)
            nSrcPos += r.nCount;
        // If group separator is not a possible operator and not one of any
        // separators then it may be parsed away in numbers. This is
        // specifically the case with NO-BREAK SPACE, which actually triggers
@@ -2835,9 +2857,9 @@ Label_MaskStateMachine:
    }
    if ( bAutoCorrect )
        aCorrectedSymbol = OUString(cSymbol, pSym - cSymbol);
    if (bAutoIntersection && nSpaces > 1)
        --nSpaces;  // replace '!!' with only one space
    return nSpaces;
    if (bAutoIntersection && vSpaces[nAutoIntersectionSpacesPos].nCount > 1)
        --vSpaces[nAutoIntersectionSpacesPos].nCount;   // replace '!!' with only one space
    return vSpaces;
}

// Convert symbol to token
@@ -4246,7 +4268,7 @@ bool ScCompiler::NextNewToken( bool bInArray )
    }

    bool bAllowBooleans = bInArray;
    sal_Int32 nSpaces = NextSymbol(bInArray);
    const std::vector<Whitespace> & vSpaces = NextSymbol(bInArray);

    if (!cSymbol[0])
    {
@@ -4266,15 +4288,31 @@ bool ScCompiler::NextNewToken( bool bInArray )
        return false;
    }

    if( nSpaces )
    if (!vSpaces.empty())
    {
        ScRawToken aToken;
        aToken.SetOpCode( ocSpaces );
        aToken.sbyte.cByte = static_cast<sal_uInt8>( std::min<sal_Int32>(nSpaces, 255) );
        if( !static_cast<ScTokenArray*>(pArr)->AddRawToken( aToken ) )
        for (const auto& rSpace : vSpaces)
        {
            SetError(FormulaError::CodeOverflow);
            return false;
            if (rSpace.cChar == 0x20)
            {
                // For now keep this a FormulaByteToken for the nasty
                // significant whitespace intersection. This probably can be
                // changed to a FormulaSpaceToken but then other places may
                // need to be adapted.
                aToken.SetOpCode( ocSpaces );
                aToken.sbyte.cByte = static_cast<sal_uInt8>( std::min<sal_Int32>(rSpace.nCount, 255) );
            }
            else
            {
                aToken.SetOpCode( ocWhitespace );
                aToken.whitespace.nCount = static_cast<sal_uInt8>( std::min<sal_Int32>(rSpace.nCount, 255) );
                aToken.whitespace.cChar = rSpace.cChar;
            }
            if (!static_cast<ScTokenArray*>(pArr)->AddRawToken( aToken ))
            {
                SetError(FormulaError::CodeOverflow);
                return false;
            }
        }
    }

diff --git a/sc/source/core/tool/parclass.cxx b/sc/source/core/tool/parclass.cxx
index 8dd3901..6c560b07 100644
--- a/sc/source/core/tool/parclass.cxx
+++ b/sc/source/core/tool/parclass.cxx
@@ -74,6 +74,7 @@ const ScParameterClassification::RawData ScParameterClassification::pRawData[] =
    { ocSep,             {{ Bounds                                               }, 0, Bounds }},
    { ocSkip,            {{ Bounds                                               }, 0, Bounds }},
    { ocSpaces,          {{ Bounds                                               }, 0, Bounds }},
    { ocWhitespace,      {{ Bounds                                               }, 0, Bounds }},
    { ocStop,            {{ Bounds                                               }, 0, Bounds }},
    { ocStringXML,       {{ Bounds                                               }, 0, Bounds }},
    { ocTableRef,        {{ Bounds                                               }, 0, Value }},    // or Reference?
diff --git a/sc/source/core/tool/token.cxx b/sc/source/core/tool/token.cxx
index 04355b8..f17cd9a 100644
--- a/sc/source/core/tool/token.cxx
+++ b/sc/source/core/tool/token.cxx
@@ -235,6 +235,11 @@ void ScRawToken::SetOpCode( OpCode e )
        case ocTableRefClose:
            eType = svSep;
            break;
        case ocWhitespace:
            eType = svByte;
            whitespace.nCount = 1;
            whitespace.cChar = 0x20;
            break;
        default:
            eType = svByte;
            sbyte.cByte = 0;
@@ -349,7 +354,10 @@ FormulaToken* ScRawToken::CreateToken(ScSheetLimits& rLimits) const
    switch ( GetType() )
    {
        case svByte :
            return new FormulaByteToken( eOp, sbyte.cByte, sbyte.eInForceArray );
            if (eOp == ocWhitespace)
                return new FormulaSpaceToken( whitespace.nCount, whitespace.cChar );
            else
                return new FormulaByteToken( eOp, sbyte.cByte, sbyte.eInForceArray );
        case svDouble :
            IF_NOT_OPCODE_ERROR( ocPush, FormulaDoubleToken);
            return new FormulaDoubleToken( nValue );
@@ -1652,6 +1660,7 @@ void ScTokenArray::CheckToken( const FormulaToken& r )
            case ocMissing:
            case ocBad:
            case ocSpaces:
            case ocWhitespace:
            case ocSkip:
            case ocPercentSign:
            case ocErrNull:
@@ -2089,6 +2098,7 @@ FormulaToken* ScTokenArray::MergeArray( )
            break;

            case ocSpaces :
            case ocWhitespace :
                // ignore spaces
                --nPrevRowSep;      // shorten this row by 1
            break;
@@ -5136,12 +5146,18 @@ OUString ScTokenArray::CreateString( sc::TokenStringContext& rCxt, const ScAddre
    {
        const FormulaToken* pToken = *p;
        OpCode eOp = pToken->GetOpCode();
        /* FIXME: why does this ignore the count of spaces? */
        if (eOp == ocSpaces)
        {
            // TODO : Handle intersection operator '!!'.
            aBuf.append(' ');
            continue;
        }
        else if (eOp == ocWhitespace)
        {
            aBuf.append( pToken->GetChar());
            continue;
        }

        if (eOp < rCxt.mxOpCodeMap->getSymbolCount())
            aBuf.append(rCxt.mxOpCodeMap->getSymbol(eOp));
diff --git a/sc/source/filter/excel/xeformula.cxx b/sc/source/filter/excel/xeformula.cxx
index f2edeff..f829529 100644
--- a/sc/source/filter/excel/xeformula.cxx
+++ b/sc/source/filter/excel/xeformula.cxx
@@ -826,9 +826,13 @@ const FormulaToken* XclExpFmlaCompImpl::PeekNextRawToken() const
bool XclExpFmlaCompImpl::GetNextToken( XclExpScToken& rTokData )
{
    rTokData.mpScToken = GetNextRawToken();
    rTokData.mnSpaces = (rTokData.GetOpCode() == ocSpaces) ? rTokData.mpScToken->GetByte() : 0;
    while( rTokData.GetOpCode() == ocSpaces )
    rTokData.mnSpaces = 0;
    /* TODO: handle ocWhitespace characters? */
    while (rTokData.GetOpCode() == ocSpaces || rTokData.GetOpCode() == ocWhitespace)
    {
        rTokData.mnSpaces += rTokData.mpScToken->GetByte();
        rTokData.mpScToken = GetNextRawToken();
    }
    return rTokData.Is();
}

diff --git a/sc/source/filter/excel/xlformula.cxx b/sc/source/filter/excel/xlformula.cxx
index 1f974f4..e2e082a 100644
--- a/sc/source/filter/excel/xlformula.cxx
+++ b/sc/source/filter/excel/xlformula.cxx
@@ -867,8 +867,11 @@ void XclTokenArrayIterator::NextRawToken()
void XclTokenArrayIterator::SkipSpaces()
{
    if( mbSkipSpaces )
        while( Is() && ((*this)->GetOpCode() == ocSpaces) )
    {
        OpCode eOp;
        while( Is() && (((eOp = (*this)->GetOpCode()) == ocSpaces) || eOp == ocWhitespace) )
            NextRawToken();
    }
}

// strings and string lists ---------------------------------------------------
diff --git a/sc/source/ui/app/inputhdl.cxx b/sc/source/ui/app/inputhdl.cxx
index f01f93d..b3e644f 100644
--- a/sc/source/ui/app/inputhdl.cxx
+++ b/sc/source/ui/app/inputhdl.cxx
@@ -654,7 +654,7 @@ void ScInputHandler::DeleteRangeFinder()

static OUString GetEditText(const EditEngine* pEng)
{
    return ScEditUtil::GetSpaceDelimitedString(*pEng);
    return ScEditUtil::GetMultilineString(*pEng);
}

static void lcl_RemoveTabs(OUString& rStr)
diff --git a/sc/source/ui/unoobj/tokenuno.cxx b/sc/source/ui/unoobj/tokenuno.cxx
index 33f005f..b07a04e 100644
--- a/sc/source/ui/unoobj/tokenuno.cxx
+++ b/sc/source/ui/unoobj/tokenuno.cxx
@@ -32,6 +32,7 @@

#include <svl/itemprop.hxx>
#include <vcl/svapp.hxx>
#include <comphelper/string.hxx>

#include <miscuno.hxx>
#include <convuno.hxx>
@@ -388,6 +389,18 @@ void ScTokenConversion::ConvertToTokenSequence( const ScDocument& rDoc,
                    // Only the count of spaces is stored as "long". Parameter count is ignored.
                    if ( eOpCode == ocSpaces )
                        rAPI.Data <<= static_cast<sal_Int32>(rToken.GetByte());
                    else if (eOpCode == ocWhitespace)
                    {
                        // Convention is one character repeated.
                        if (rToken.GetByte() == 1)
                            rAPI.Data <<= OUString( rToken.GetChar());
                        else
                        {
                            OUStringBuffer aBuf( rToken.GetByte());
                            comphelper::string::padToLength( aBuf, rToken.GetByte(), rToken.GetChar());
                            rAPI.Data <<= aBuf.makeStringAndClear();
                        }
                    }
                    else
                        rAPI.Data.clear();      // no data
                    break;
diff --git a/sc/source/ui/view/viewfunc.cxx b/sc/source/ui/view/viewfunc.cxx
index fbe8a0b..4e5e149 100644
--- a/sc/source/ui/view/viewfunc.cxx
+++ b/sc/source/ui/view/viewfunc.cxx
@@ -677,7 +677,7 @@ void ScViewFunc::EnterData( SCCOL nCol, SCROW nRow, SCTAB nTab,
        }

        // #i97726# always get text for "repeat" of undo action
        aString = ScEditUtil::GetSpaceDelimitedString(aEngine);
        aString = ScEditUtil::GetMultilineString(aEngine);

        //      undo