tdf#108995: take xml:space attribute into account

See paragraph 2.10 of XML 1.0 specification and 17.3.3.31 of ECMA-376-1:2016

Change-Id: I7f19d3b9cf2ccce88a5fa03022beeb99facc04fe
Reviewed-on: https://gerrit.libreoffice.org/39682
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
diff --git a/sw/qa/extras/ooxmlimport/data/xml_space.docx b/sw/qa/extras/ooxmlimport/data/xml_space.docx
new file mode 100644
index 0000000..305c135
--- /dev/null
+++ b/sw/qa/extras/ooxmlimport/data/xml_space.docx
Binary files differ
diff --git a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
index 2513ca8..817c065 100644
--- a/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
+++ b/sw/qa/extras/ooxmlimport/ooxmlimport.cxx
@@ -1372,6 +1372,16 @@ DECLARE_OOXMLIMPORT_TEST(testTdf108714, "tdf108714.docx")
    CPPUNIT_ASSERT_EQUAL(style::BreakType_PAGE_BEFORE, breakType);
}

DECLARE_OOXMLIMPORT_TEST(testTdf108995, "xml_space.docx")
{
    CPPUNIT_ASSERT_EQUAL(1, getParagraphs());
    // We need to take xml:space attribute into account
    uno::Reference< text::XTextRange > paragraph = getParagraph(1);
    CPPUNIT_ASSERT_EQUAL(OUString("\tA\t\tline  with\txml:space=\"preserve\" \n"
                                  "A  line  without xml:space"),
                         paragraph->getString());
}

// tests should only be added to ooxmlIMPORT *if* they fail round-tripping in ooxmlEXPORT

CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
index 6ec8820..acb40f85 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.cxx
@@ -601,6 +601,12 @@ void OOXMLFastContextHandler::text(const OUString & sText)
        // tdf#108806: CRLFs in XML were converted to \n before this point.
        // These must be converted to spaces before further processing.
        OUString sNormalizedText = sText.replaceAll("\n", " ");
        // tdf#108995: by default, leading and trailing white space is ignored;
        // tabs are converted to spaces
        if (!IsPreserveSpace())
        {
            sNormalizedText = sNormalizedText.trim().replaceAll("\t", " ");
        }
        mpStream->utext(reinterpret_cast < const sal_uInt8 * >
                        (sNormalizedText.getStr()),
                        sNormalizedText.getLength());
@@ -863,6 +869,15 @@ void OOXMLFastContextHandler::sendPropertiesToParent()
    }
}

bool OOXMLFastContextHandler::IsPreserveSpace() const
{
    // xml:space attribute applies to all elements within the content of the element where it is specified,
    // unless overridden with another instance of the xml:space attribute
    if (mpParent)
        return mpParent->IsPreserveSpace();
    return false; // default value
}

/*
  class OOXMLFastContextHandlerStream
 */
@@ -870,7 +885,9 @@ void OOXMLFastContextHandler::sendPropertiesToParent()
OOXMLFastContextHandlerStream::OOXMLFastContextHandlerStream
(OOXMLFastContextHandler * pContext)
: OOXMLFastContextHandler(pContext),
  mpPropertySetAttrs(new OOXMLPropertySet)
  mpPropertySetAttrs(new OOXMLPropertySet),
  mbPreserveSpace(false),
  mbPreserveSpaceSet(false)
{
}

@@ -881,7 +898,14 @@ OOXMLFastContextHandlerStream::~OOXMLFastContextHandlerStream()
void OOXMLFastContextHandlerStream::newProperty(Id nId,
                                                const OOXMLValue::Pointer_t& pVal)
{
    if (nId != 0x0)
    if (nId == NS_ooxml::LN_CT_Text_space)
    {
        // Set <xml:space> value early, to allow
        // child contexts use it when dealing with strings
        mbPreserveSpace = pVal->getString() == "preserve";
        mbPreserveSpaceSet = true;
    }
    else if (nId != 0x0)
    {
        OOXMLProperty::Pointer_t pProperty(new OOXMLProperty(nId, pVal, OOXMLProperty::ATTRIBUTE));

@@ -913,6 +937,15 @@ void OOXMLFastContextHandlerStream::handleHyperlink()
    aHyperlinkHandler.writetext();
}

bool OOXMLFastContextHandlerStream::IsPreserveSpace() const
{
    // xml:space attribute applies to all elements within the content of the element where it is specified,
    // unless overridden with another instance of the xml:space attribute
    if (mbPreserveSpaceSet)
        return mbPreserveSpace;
    return OOXMLFastContextHandler::IsPreserveSpace();
}

/*
  class OOXMLFastContextHandlerProperties
 */
diff --git a/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx b/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
index aabbc7e..b33f474 100644
--- a/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
+++ b/writerfilter/source/ooxml/OOXMLFastContextHandler.hxx
@@ -221,6 +221,9 @@ protected:
    void startAction();
    void endAction();

    // 2.10 of XML 1.0 specification
    virtual bool IsPreserveSpace() const;

    const css::uno::Reference< css::uno::XComponentContext >& getComponentContext() { return m_xContext;}

    bool inPositionV;
@@ -253,8 +256,13 @@ public:

    void handleHyperlink();

protected:
    virtual bool IsPreserveSpace() const override;

private:
    mutable OOXMLPropertySet::Pointer_t mpPropertySetAttrs;
    bool mbPreserveSpace    : 1;
    bool mbPreserveSpaceSet : 1;
};

class OOXMLFastContextHandlerProperties : public OOXMLFastContextHandler