tdf#152980 CSV import: Fix control character length in XLSX save

Converting from CSV to XLSX corrupts text that looks like a control
character. Only 4 numeric length escape character allowed, in _x000D_
format, not _x0D_ for exampled.

Change lcl_unEscapeUnicodeChars function to decodeXString. Delete not used functions and add multiple occurence for unit test.

Change-Id: Id1d4bfcf7d27cf5005e7bea8e289303c5d9aca73
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151494
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Eike Rathke <erack@redhat.com>
diff --git a/sc/qa/unit/data/csv/tdf152980.csv b/sc/qa/unit/data/csv/tdf152980.csv
new file mode 100644
index 0000000..c5050b8
--- /dev/null
+++ b/sc/qa/unit/data/csv/tdf152980.csv
Binary files differ
diff --git a/sc/qa/unit/subsequent_export_test4.cxx b/sc/qa/unit/subsequent_export_test4.cxx
index 79b5441..233195b 100644
--- a/sc/qa/unit/subsequent_export_test4.cxx
+++ b/sc/qa/unit/subsequent_export_test4.cxx
@@ -1638,6 +1638,33 @@ CPPUNIT_TEST_FIXTURE(ScExportTest4, testTdf119565)
                         xShapeProps->getPropertyValue("LineJoint").get<drawing::LineJoint>());
}

CPPUNIT_TEST_FIXTURE(ScExportTest4, testTdf152980)
{
    createScDoc("csv/tdf152980.csv");
    ScDocShell* pDocSh = getScDocShell();
    pDocSh->DoHardRecalc();
    saveAndReload("Calc Office Open XML");
    pDocSh = getScDocShell();
    pDocSh->DoHardRecalc();

    ScDocument* pDoc = getScDoc();

    // - Expected: The part between a and b does not change
    // - Actual  : Only the characters a and b remain
    CPPUNIT_ASSERT_EQUAL(OUString("a_x1_b"), pDoc->GetString(0, 0, 0));
    CPPUNIT_ASSERT_EQUAL(OUString("a_x01_b"), pDoc->GetString(0, 1, 0));
    CPPUNIT_ASSERT_EQUAL(OUString("a_x001_b"), pDoc->GetString(0, 2, 0));

    // The character code does not change in both cases
    CPPUNIT_ASSERT_EQUAL(OUString("a_x0001_b"), pDoc->GetString(0, 3, 0));

    // The escape characters are handled correctly in both cases
    CPPUNIT_ASSERT_EQUAL(OUString("a_xfoo\nb"), pDoc->GetString(0, 4, 0));
    CPPUNIT_ASSERT_EQUAL(OUString("a\tb"), pDoc->GetString(0, 5, 0));
    CPPUNIT_ASSERT_EQUAL(OUString("a\nb"), pDoc->GetString(0, 6, 0));
    CPPUNIT_ASSERT_EQUAL(OUString("a\n\nb"), pDoc->GetString(0, 7, 0));
}

CPPUNIT_PLUGIN_IMPLEMENT();

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sc/source/filter/oox/richstring.cxx b/sc/source/filter/oox/richstring.cxx
index a9b272d..8d2f964 100644
--- a/sc/source/filter/oox/richstring.cxx
+++ b/sc/source/filter/oox/richstring.cxx
@@ -48,116 +48,6 @@ bool lclNeedsRichTextFormat( const oox::xls::Font* pFont )
    return pFont && pFont->needsRichTextFormat();
}

sal_Int32 lcl_getHexLetterValue(sal_Unicode nCode)
{
    if (nCode >= '0' && nCode <= '9')
        return nCode - '0';

    if (nCode >= 'A' && nCode <= 'F')
        return nCode - 'A' + 10;

    if (nCode >= 'a' && nCode <= 'f')
        return nCode - 'a' + 10;

    return -1;
}

bool lcl_validEscape(sal_Unicode nCode)
{
    // Valid XML chars that can be escaped (ignoring the restrictions) as in the OOX open spec
    // 2.1.1742 Part 1 Section 22.9.2.19, ST_Xstring (Escaped String)
    if (nCode == 0x000D || nCode == 0x000A || nCode == 0x0009 || nCode == 0x005F)
        return true;

    // Other valid XML chars in basic multilingual plane that cannot be escaped.
    if ((nCode >= 0x0020 && nCode <= 0xD7FF) || (nCode >= 0xE000 && nCode <= 0xFFFD))
        return false;

    return true;
}

OUString lcl_unEscapeUnicodeChars(const OUString& rSrc)
{
    // Example: Escaped representation of unicode char 0x000D is _x000D_

    sal_Int32 nLen = rSrc.getLength();
    if (!nLen)
        return rSrc;

    sal_Int32 nStart = 0;
    bool bFound = false;
    const OUString aPrefix = "_x";
    sal_Int32 nPrefixStart = rSrc.indexOf(aPrefix, nStart);

    if (nPrefixStart == -1)
        return rSrc;

    OUStringBuffer aBuf(rSrc);
    sal_Int32 nOffset = 0; // index offset in aBuf w.r.t rSrc.

    do
    {
        sal_Int32 nEnd = -1;
        sal_Unicode nCode = 0;
        bool bFoundThis = false;
        for (sal_Int32 nIdx = 0; nIdx < 5; ++nIdx)
        {
            sal_Int32 nThisIdx = nPrefixStart + nIdx + 2;
            if (nThisIdx >= nLen)
                break;

            sal_Unicode nThisCode = rSrc[nThisIdx];
            sal_Int32 nLetter = lcl_getHexLetterValue(nThisCode);

            if (!nIdx && nLetter < 0)
                break;

            if (nLetter >= 0)
            {
                nCode = (nCode << 4) + static_cast<sal_Unicode>(nLetter);
            }
            else if (nThisCode == '_')
            {
                nEnd = nThisIdx + 1;
                bFoundThis = true;
                break;
            }
            else
            {
                break;
            }
        }

        if (bFoundThis)
        {
            // nEnd is already set inside the inner loop in this case.
            if (lcl_validEscape(nCode))
            {
                bFound = true;
                sal_Int32 nEscStrLen = nEnd - nPrefixStart;
                aBuf.remove(nPrefixStart - nOffset, nEscStrLen);
                aBuf.insert(nPrefixStart - nOffset, nCode);

                nOffset += nEscStrLen - 1;
            }
        }
        else
        {
            // Start the next search just after last "_x"
            nEnd = nPrefixStart + 2;
        }

        nStart = nEnd;
        nPrefixStart = rSrc.indexOf(aPrefix, nStart);
    }
    while (nPrefixStart != -1);

    if (bFound)
        return aBuf.makeStringAndClear();

    return rSrc;
}

} // namespace

RichStringPortion::RichStringPortion() :
@@ -168,7 +58,7 @@ RichStringPortion::RichStringPortion() :

void RichStringPortion::setText( const OUString& rText )
{
    maText = lcl_unEscapeUnicodeChars(rText);
    maText = AttributeConversion::decodeXString(rText);
}

FontRef const & RichStringPortion::createFont(const WorkbookHelper& rHelper)