tdf#132149 ww8export: respect ginormous paragraphs
This handles the extremely unlikely case where
a single paragraph contains multiple soft-page-breaks
from spanning more than two pages.
But it makes the unit tests I designed look much
better, so I am happy. I think it might help to
make the code slightly more understandable too,
and it convinces me that I am understanding
this section as I write multiple fixes against it.
[Better evidence than all of this is that Michael Stahl
came the the same conclusion from a code read - I just
beat him to it.]
(P.S. It isn't enough to change CurrentPageDesc, because
that is reset from rNode.FindPageDesc on every WriteText.
So the effective pageDesc needs to be kept track of.)
Change-Id: I5852e90571a74f3df4362caf058f7960f413dad3
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/101545
Tested-by: Jenkins
Reviewed-by: Justin Luth <justin_luth@sil.org>
Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
diff --git a/sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odt b/sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odt
new file mode 100644
index 0000000..afa7961
--- /dev/null
+++ b/sw/qa/extras/ooxmlexport/data/tdf132149_pgBreak2.odt
Binary files differ
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
index 65286ea..a9618eb 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport15.cxx
@@ -13,6 +13,7 @@
#include <tools/color.hxx>
#include <com/sun/star/beans/XPropertySet.hpp>
#include <com/sun/star/beans/NamedValue.hpp>
#include <com/sun/star/style/BreakType.hpp>
#include <com/sun/star/text/RelOrientation.hpp>
#include <com/sun/star/text/XTextViewCursorSupplier.hpp>
#include <com/sun/star/text/XPageCursor.hpp>
@@ -535,11 +536,28 @@ DECLARE_OOXMLEXPORT_TEST(testTdf132149_pgBreak, "tdf132149_pgBreak.odt")
assertXPath(pDump, "//page[2]/infos/bounds", "width", "5953"); //portrait
// This two-line 3rd page ought not to exist. DID YOU FIX ME? The real page 3 should be "8391" landscape.
assertXPath(pDump, "//page[3]/infos/bounds", "width", "5953");
// This really ought to be on odd page 3, but now it is on odd page 5.
assertXPath(pDump, "//page[5]/infos/bounds", "width", "8391");
assertXPath(pDump, "//page[5]/infos/prtBounds", "right", "6122"); //Left page style
//Page break is not lost. This SHOULD be on page 4, but sadly it is not.
assertXPathContent(pDump, "//page[5]/header/txt", "First Page Style");
CPPUNIT_ASSERT(getXPathContent(pDump, "//page[5]/body/txt").startsWith("Lorem ipsum"));
//Page style change here must not be lost. This SHOULD be on page 4, but sadly it is not.
assertXPathContent(pDump, "//page[6]/header/txt", "First Page Style");
CPPUNIT_ASSERT(getXPath(pDump, "//page[6]/body/txt[1]/Text[1]", "Portion").startsWith("Lorem ipsum"));
}
DECLARE_OOXMLEXPORT_TEST(testTdf132149_pgBreak2, "tdf132149_pgBreak2.odt")
{
// This 3 page document is designed to visually exaggerate the problems
// of emulating LO's followed-by-page-style into MSWord's sections.
// The only specified page style change should be between page 1 and 2.
// When the first paragraph was split into 3, each paragraph specified a page break. The last was unnecessary.
uno::Reference<beans::XPropertySet> xParaThree(getParagraph(3), uno::UNO_QUERY_THROW);
CPPUNIT_ASSERT_EQUAL(uno::Any(), xParaThree->getPropertyValue("PageDescName"));
// The ODT is only 2 paragraphs, but a hack to get the right page style breaks para1 into pieces.
// This was 4 paragraphs - the unnecessary page break had hacked in another paragraph split.
CPPUNIT_ASSERT_LESSEQUAL( 3, getParagraphs() );
}
DECLARE_OOXMLEXPORT_TEST(testTdf135949_anchoredBeforeBreak, "tdf135949_anchoredBeforeBreak.docx")
diff --git a/sw/source/filter/ww8/wrtw8nds.cxx b/sw/source/filter/ww8/wrtw8nds.cxx
index 0c04d09..96d12e8 100644
--- a/sw/source/filter/ww8/wrtw8nds.cxx
+++ b/sw/source/filter/ww8/wrtw8nds.cxx
@@ -2265,6 +2265,7 @@ void MSWordExportBase::OutputTextNode( SwTextNode& rNode )
// Let's decide if we need to split the paragraph because of a section break
bool bNeedParaSplit = NeedTextNodeSplit( rNode, softBreakList )
&& !IsInTable();
const SwPageDesc* pNextSplitParaPageDesc = m_pCurrentPageDesc;
auto aBreakIt = softBreakList.begin();
// iterate through portions on different pages
@@ -2273,7 +2274,13 @@ void MSWordExportBase::OutputTextNode( SwTextNode& rNode )
sal_Int32 nCurrentPos = *aBreakIt;
if( softBreakList.size() > 1 ) // not for empty paragraph
++aBreakIt;
{
// no need to split again if the page style won't change anymore
if ( pNextSplitParaPageDesc == pNextSplitParaPageDesc->GetFollow() )
aBreakIt = --softBreakList.end();
else
++aBreakIt;
}
AttrOutput().StartParagraph( pTextNodeInfo );
@@ -2718,9 +2725,9 @@ void MSWordExportBase::OutputTextNode( SwTextNode& rNode )
// if paragraph is split, put the section break between the parts
if( bNeedParaSplit && *aBreakIt != rNode.GetText().getLength() )
{
const SwPageDesc* pNextPageDesc = m_pCurrentPageDesc->GetFollow();
assert(pNextPageDesc);
PrepareNewPageDesc( rNode.GetpSwAttrSet(), rNode, nullptr , pNextPageDesc);
pNextSplitParaPageDesc = pNextSplitParaPageDesc->GetFollow();
assert(pNextSplitParaPageDesc);
PrepareNewPageDesc( rNode.GetpSwAttrSet(), rNode, nullptr , pNextSplitParaPageDesc);
}
else
{