tdf#152980 CSV import: Fix control character length in XLSX save
Converting from CSV to XLSX corrupts text that looks like a control
character. Only 4 numeric length escape character allowed, in _x000D_
format, not _x0D_ for exampled.
Change lcl_unEscapeUnicodeChars function to decodeXString. Delete not used functions and add multiple occurence for unit test.
Change-Id: Id1d4bfcf7d27cf5005e7bea8e289303c5d9aca73
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151494
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Eike Rathke <erack@redhat.com>
Signed-off-by: Xisco Fauli <xiscofauli@libreoffice.org>
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151562
Reviewed-by: Michael Stahl <michael.stahl@allotropia.de>
Tested-by: Jenkins
diff --git a/sc/qa/unit/data/csv/tdf152980.csv b/sc/qa/unit/data/csv/tdf152980.csv
new file mode 100644
index 0000000..c5050b8
--- /dev/null
+++ b/sc/qa/unit/data/csv/tdf152980.csv
Binary files differ
diff --git a/sc/qa/unit/subsequent_export_test2.cxx b/sc/qa/unit/subsequent_export_test2.cxx
index 56d7ac1..d1920de 100644
--- a/sc/qa/unit/subsequent_export_test2.cxx
+++ b/sc/qa/unit/subsequent_export_test2.cxx
@@ -193,6 +193,7 @@ public:
void testTotalsRowFunction();
void testAutofilterHiddenButton();
void testTdf119565();
void testTdf152980();
CPPUNIT_TEST_SUITE(ScExportTest2);
@@ -325,6 +326,7 @@ public:
CPPUNIT_TEST(testTotalsRowFunction);
CPPUNIT_TEST(testAutofilterHiddenButton);
CPPUNIT_TEST(testTdf119565);
CPPUNIT_TEST(testTdf152980);
CPPUNIT_TEST_SUITE_END();
};
@@ -2969,6 +2971,33 @@ void ScExportTest2::testTdf119565()
xShapeProps->getPropertyValue("LineJoint").get<drawing::LineJoint>());
}
void ScExportTest2::testTdf152980()
{
createScDoc("csv/tdf152980.csv");
ScDocShell* pDocSh = getScDocShell();
pDocSh->DoHardRecalc();
saveAndReload("Calc Office Open XML");
pDocSh = getScDocShell();
pDocSh->DoHardRecalc();
ScDocument* pDoc = getScDoc();
// - Expected: The part between a and b does not change
// - Actual : Only the characters a and b remain
CPPUNIT_ASSERT_EQUAL(OUString("a_x1_b"), pDoc->GetString(0, 0, 0));
CPPUNIT_ASSERT_EQUAL(OUString("a_x01_b"), pDoc->GetString(0, 1, 0));
CPPUNIT_ASSERT_EQUAL(OUString("a_x001_b"), pDoc->GetString(0, 2, 0));
// The character code does not change in both cases
CPPUNIT_ASSERT_EQUAL(OUString("a_x0001_b"), pDoc->GetString(0, 3, 0));
// The escape characters are handled correctly in both cases
CPPUNIT_ASSERT_EQUAL(OUString("a_xfoo\nb"), pDoc->GetString(0, 4, 0));
CPPUNIT_ASSERT_EQUAL(OUString("a\tb"), pDoc->GetString(0, 5, 0));
CPPUNIT_ASSERT_EQUAL(OUString("a\nb"), pDoc->GetString(0, 6, 0));
CPPUNIT_ASSERT_EQUAL(OUString("a\n\nb"), pDoc->GetString(0, 7, 0));
}
CPPUNIT_TEST_SUITE_REGISTRATION(ScExportTest2);
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/filter/oox/richstring.cxx b/sc/source/filter/oox/richstring.cxx
index a9b272d..8d2f964 100644
--- a/sc/source/filter/oox/richstring.cxx
+++ b/sc/source/filter/oox/richstring.cxx
@@ -48,116 +48,6 @@ bool lclNeedsRichTextFormat( const oox::xls::Font* pFont )
return pFont && pFont->needsRichTextFormat();
}
sal_Int32 lcl_getHexLetterValue(sal_Unicode nCode)
{
if (nCode >= '0' && nCode <= '9')
return nCode - '0';
if (nCode >= 'A' && nCode <= 'F')
return nCode - 'A' + 10;
if (nCode >= 'a' && nCode <= 'f')
return nCode - 'a' + 10;
return -1;
}
bool lcl_validEscape(sal_Unicode nCode)
{
// Valid XML chars that can be escaped (ignoring the restrictions) as in the OOX open spec
// 2.1.1742 Part 1 Section 22.9.2.19, ST_Xstring (Escaped String)
if (nCode == 0x000D || nCode == 0x000A || nCode == 0x0009 || nCode == 0x005F)
return true;
// Other valid XML chars in basic multilingual plane that cannot be escaped.
if ((nCode >= 0x0020 && nCode <= 0xD7FF) || (nCode >= 0xE000 && nCode <= 0xFFFD))
return false;
return true;
}
OUString lcl_unEscapeUnicodeChars(const OUString& rSrc)
{
// Example: Escaped representation of unicode char 0x000D is _x000D_
sal_Int32 nLen = rSrc.getLength();
if (!nLen)
return rSrc;
sal_Int32 nStart = 0;
bool bFound = false;
const OUString aPrefix = "_x";
sal_Int32 nPrefixStart = rSrc.indexOf(aPrefix, nStart);
if (nPrefixStart == -1)
return rSrc;
OUStringBuffer aBuf(rSrc);
sal_Int32 nOffset = 0; // index offset in aBuf w.r.t rSrc.
do
{
sal_Int32 nEnd = -1;
sal_Unicode nCode = 0;
bool bFoundThis = false;
for (sal_Int32 nIdx = 0; nIdx < 5; ++nIdx)
{
sal_Int32 nThisIdx = nPrefixStart + nIdx + 2;
if (nThisIdx >= nLen)
break;
sal_Unicode nThisCode = rSrc[nThisIdx];
sal_Int32 nLetter = lcl_getHexLetterValue(nThisCode);
if (!nIdx && nLetter < 0)
break;
if (nLetter >= 0)
{
nCode = (nCode << 4) + static_cast<sal_Unicode>(nLetter);
}
else if (nThisCode == '_')
{
nEnd = nThisIdx + 1;
bFoundThis = true;
break;
}
else
{
break;
}
}
if (bFoundThis)
{
// nEnd is already set inside the inner loop in this case.
if (lcl_validEscape(nCode))
{
bFound = true;
sal_Int32 nEscStrLen = nEnd - nPrefixStart;
aBuf.remove(nPrefixStart - nOffset, nEscStrLen);
aBuf.insert(nPrefixStart - nOffset, nCode);
nOffset += nEscStrLen - 1;
}
}
else
{
// Start the next search just after last "_x"
nEnd = nPrefixStart + 2;
}
nStart = nEnd;
nPrefixStart = rSrc.indexOf(aPrefix, nStart);
}
while (nPrefixStart != -1);
if (bFound)
return aBuf.makeStringAndClear();
return rSrc;
}
} // namespace
RichStringPortion::RichStringPortion() :
@@ -168,7 +58,7 @@ RichStringPortion::RichStringPortion() :
void RichStringPortion::setText( const OUString& rText )
{
maText = lcl_unEscapeUnicodeChars(rText);
maText = AttributeConversion::decodeXString(rText);
}
FontRef const & RichStringPortion::createFont(const WorkbookHelper& rHelper)