tdf#132149 ww8export: respect ginormous paragraphs

This handles the extremely unlikely case where
a single paragraph contains multiple soft-page-breaks
from spanning more than two pages.

But it makes the unit tests I designed look much
better, so I am happy. I think it might help to
make the code slightly more understandable too,
and it convinces me that I am understanding
this section as I write multiple fixes against it.
[Better evidence than all of this is that Michael Stahl
came the the same conclusion from a code read - I just
beat him to it.]

(P.S. It isn't enough to change CurrentPageDesc, because
that is reset from rNode.FindPageDesc on every WriteText.
So the effective pageDesc needs to be kept track of.)

Change-Id: I5852e90571a74f3df4362caf058f7960f413dad3
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/101545
Tested-by: Jenkins
Reviewed-by: Justin Luth <justin_luth@sil.org>
Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
diff --git a/sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odt b/sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odt
new file mode 100644
index 0000000..afa7961
--- /dev/null
+++ b/sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odt
Binary files differ
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
index 65286ea..a9618eb 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
@@ -13,6 +13,7 @@
#include <tools/color.hxx>
#include <com/sun/star/beans/XPropertySet.hpp>
#include <com/sun/star/beans/NamedValue.hpp>
#include <com/sun/star/style/BreakType.hpp>
#include <com/sun/star/text/RelOrientation.hpp>
#include <com/sun/star/text/XTextViewCursorSupplier.hpp>
#include <com/sun/star/text/XPageCursor.hpp>
@@ -535,11 +536,28 @@ DECLARE_OOXMLEXPORT_TEST(testTdf132149_pgBreak, "tdf132149_pgBreak.odt")
    assertXPath(pDump, "//page[2]/infos/bounds", "width", "5953");  //portrait
    // This two-line 3rd page ought not to exist. DID YOU FIX ME? The real page 3 should be "8391" landscape.
    assertXPath(pDump, "//page[3]/infos/bounds", "width", "5953");
    // This really ought to be on odd page 3, but now it is on odd page 5.
    assertXPath(pDump, "//page[5]/infos/bounds", "width", "8391");
    assertXPath(pDump, "//page[5]/infos/prtBounds", "right", "6122");  //Left page style


    //Page break is not lost. This SHOULD be on page 4, but sadly it is not.
    assertXPathContent(pDump, "//page[5]/header/txt", "First Page Style");
    CPPUNIT_ASSERT(getXPathContent(pDump, "//page[5]/body/txt").startsWith("Lorem ipsum"));
    //Page style change here must not be lost. This SHOULD be on page 4, but sadly it is not.
    assertXPathContent(pDump, "//page[6]/header/txt", "First Page Style");
    CPPUNIT_ASSERT(getXPath(pDump, "//page[6]/body/txt[1]/Text[1]", "Portion").startsWith("Lorem ipsum"));
}

DECLARE_OOXMLEXPORT_TEST(testTdf132149_pgBreak2, "tdf132149_pgBreak2.odt")
{
    // This 3 page document is designed to visually exaggerate the problems
    // of emulating LO's followed-by-page-style into MSWord's sections.

    // The only specified page style change should be between page 1 and 2.
    // When the first paragraph was split into 3, each paragraph specified a page break. The last was unnecessary.
    uno::Reference<beans::XPropertySet> xParaThree(getParagraph(3), uno::UNO_QUERY_THROW);
    CPPUNIT_ASSERT_EQUAL(uno::Any(), xParaThree->getPropertyValue("PageDescName"));
    // The ODT is only 2 paragraphs, but a hack to get the right page style breaks para1 into pieces.
    // This was 4 paragraphs - the unnecessary page break had hacked in another paragraph split.
    CPPUNIT_ASSERT_LESSEQUAL( 3, getParagraphs() );
}

DECLARE_OOXMLEXPORT_TEST(testTdf135949_anchoredBeforeBreak, "tdf135949_anchoredBeforeBreak.docx")
diff --git a/sw/source/filter/ww8/wrtw8nds.cxx b/sw/source/filter/ww8/wrtw8nds.cxx
index 0c04d09..96d12e8 100644
--- a/sw/source/filter/ww8/wrtw8nds.cxx
+++ b/sw/source/filter/ww8/wrtw8nds.cxx
@@ -2265,6 +2265,7 @@ void MSWordExportBase::OutputTextNode( SwTextNode& rNode )
    // Let's decide if we need to split the paragraph because of a section break
    bool bNeedParaSplit = NeedTextNodeSplit( rNode, softBreakList )
                        && !IsInTable();
    const SwPageDesc* pNextSplitParaPageDesc = m_pCurrentPageDesc;

    auto aBreakIt = softBreakList.begin();
    // iterate through portions on different pages
@@ -2273,7 +2274,13 @@ void MSWordExportBase::OutputTextNode( SwTextNode& rNode )
        sal_Int32 nCurrentPos = *aBreakIt;

        if( softBreakList.size() > 1 ) // not for empty paragraph
            ++aBreakIt;
        {
            // no need to split again if the page style won't change anymore
            if ( pNextSplitParaPageDesc == pNextSplitParaPageDesc->GetFollow() )
                aBreakIt = --softBreakList.end();
            else
                ++aBreakIt;
        }

        AttrOutput().StartParagraph( pTextNodeInfo );

@@ -2718,9 +2725,9 @@ void MSWordExportBase::OutputTextNode( SwTextNode& rNode )
        // if paragraph is split, put the section break between the parts
        if( bNeedParaSplit && *aBreakIt != rNode.GetText().getLength() )
        {
                const SwPageDesc* pNextPageDesc = m_pCurrentPageDesc->GetFollow();
                assert(pNextPageDesc);
                PrepareNewPageDesc( rNode.GetpSwAttrSet(), rNode, nullptr , pNextPageDesc);
            pNextSplitParaPageDesc = pNextSplitParaPageDesc->GetFollow();
            assert(pNextSplitParaPageDesc);
            PrepareNewPageDesc( rNode.GetpSwAttrSet(), rNode, nullptr , pNextSplitParaPageDesc);
        }
        else
        {