tdf#155496: Export list header to (X)HTML using 'display: block' style

This re-implements commit 013a4f1f5c9ea5fb511568c53a7e76d1b365a65d
(sw XHTML export: fix handling of list labels, 2021-05-13), and
instead of not putting headers (ODF 'text:list-header' elements)
into lists, this adds 'style="display: block"' attribute to the
respective list items. This makes sure that the items use proper
list indentation, and produces correct markup.

Change-Id: I900e4aebbe562830dc2ce5400e3e33b38c2f2faa
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/152280
Tested-by: Jenkins
Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
diff --git a/sw/qa/extras/htmlexport/data/listItemSubheader.fodt b/sw/qa/extras/htmlexport/data/listItemSubheader.fodt
new file mode 100644
index 0000000..4c68ce8
--- /dev/null
+++ b/sw/qa/extras/htmlexport/data/listItemSubheader.fodt
@@ -0,0 +1,31 @@
<?xml version="1.0" encoding="UTF-8"?>

<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" office:version="1.3" office:mimetype="application/vnd.oasis.opendocument.text">
 <office:body>
  <office:text>
   <text:p/>
   <text:list>
    <text:list-item>
     <text:p>list 1 item 1</text:p>
     <text:list>
      <text:list-header>
       <text:p>list 1 item 1 sub-header</text:p>
      </text:list-header>
     </text:list>
    </text:list-item>
   </text:list>
   <text:p/>
   <text:p>text</text:p>
   <text:list text:continue-numbering="true">
    <text:list-item>
     <text:list>
      <text:list-header>
       <text:p>list 2 sub-header</text:p>
      </text:list-header>
     </text:list>
    </text:list-item>
   </text:list>
   <text:p/>
  </office:text>
 </office:body>
</office:document>
\ No newline at end of file
diff --git a/sw/qa/extras/htmlexport/htmlexport.cxx b/sw/qa/extras/htmlexport/htmlexport.cxx
index 710c4c0..105b19c 100644
--- a/sw/qa/extras/htmlexport/htmlexport.cxx
+++ b/sw/qa/extras/htmlexport/htmlexport.cxx
@@ -1552,10 +1552,13 @@ CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testListHeading)
    CPPUNIT_ASSERT(pXmlDoc);

    // Without the accompanying fix in place, this test would have failed:
    // - expected: <div><p>...</p></div>
    // - expected: <div><ol><li style="display: block"><p>...</p></li></ol></div>
    // - actual  : <div><ol><p>...</p></li></ol></div>
    // because a </li> but no <li> is not well-formed and <ol> with a non-li children is invalid.
    assertXPathContent(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p", "list header");
    OUString aContent
        = getXPathContent(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ol/"
                                   "reqif-xhtml:li[@style='display: block']/reqif-xhtml:p");
    CPPUNIT_ASSERT_EQUAL(OUString("list header"), aContent.trim());
}

CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testPartiallyNumberedList)
@@ -2151,11 +2154,10 @@ CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testListsHeading)
    xmlDocUniquePtr pXmlDoc = parseXmlStream(&aStream);
    CPPUNIT_ASSERT(pXmlDoc);

    // Without the accompanying fix in place, this test would have failed with:
    // - In <>, XPath '/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p' not found
    // Because the headers of list 1 were inside <div><ol>, not directly under <div>.
    assertXPathContent(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:p",
                       "list 1, header 1");
    OUString aContent
        = getXPathContent(pXmlDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ol/"
                                   "reqif-xhtml:li[@style='display: block']/reqif-xhtml:p");
    CPPUNIT_ASSERT_EQUAL(OUString("list 1, header 1"), aContent.trim());
}

CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testOleEmfPreviewToHtml)
@@ -2611,6 +2613,38 @@ CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testTdf155387)
        "l3");
}

CPPUNIT_TEST_FIXTURE(SwHtmlDomExportTest, testTdf155496)
{
    createSwDoc("listItemSubheader.fodt");
    ExportToReqif();

    SvMemoryStream aStream;
    WrapReqifFromTempFile(aStream);
    xmlDocUniquePtr pDoc = parseXmlStream(&aStream);
    // Without the fix in place, this would fail
    CPPUNIT_ASSERT(pDoc);

    // Two top-level lists
    assertXPath(pDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ul", 2);
    // Single top-level item
    assertXPath(pDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ul[1]/reqif-xhtml:li");
    // One top-level paragraph in the item
    assertXPath(pDoc,
                "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ul[1]/reqif-xhtml:li/reqif-xhtml:p");
    // One sublist in the item
    assertXPath(
        pDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ul[1]/reqif-xhtml:li/reqif-xhtml:ul");
    // One item in the sublist
    assertXPath(pDoc,
                "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ul[1]/reqif-xhtml:li/reqif-xhtml:ul/"
                "reqif-xhtml:li");
    // Check its text
    OUString aContent = getXPathContent(
        pDoc, "/reqif-xhtml:html/reqif-xhtml:div/reqif-xhtml:ul[1]/reqif-xhtml:li/reqif-xhtml:ul/"
              "reqif-xhtml:li/reqif-xhtml:p");
    CPPUNIT_ASSERT_EQUAL(OUString("list 1 item 1\n\t\tsub-header"), aContent.trim());
}

CPPUNIT_PLUGIN_IMPLEMENT();

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/html/htmlatr.cxx b/sw/source/filter/html/htmlatr.cxx
index 37b460e..62460cc 100644
--- a/sw/source/filter/html/htmlatr.cxx
+++ b/sw/source/filter/html/htmlatr.cxx
@@ -688,12 +688,11 @@ static void OutHTML_SwFormat( SwHTMLWriter& rWrt, const SwFormat& rFormat,
    if( nNewDefListLvl != rWrt.m_nDefListLvl )
        rWrt.OutAndSetDefList( nNewDefListLvl );

    bool bAtLeastOneNumbered = false;
    // if necessary, start a bulleted or numbered list
    if( rInfo.bInNumberBulletList )
    {
        OSL_ENSURE( !rWrt.m_nDefListLvl, "DL cannot be inside OL!" );
        OutHTML_NumberBulletListStart( rWrt, aNumInfo, bAtLeastOneNumbered );
        OutHTML_NumberBulletListStart( rWrt, aNumInfo );

        if( bNumbered )
        {
@@ -763,18 +762,26 @@ static void OutHTML_SwFormat( SwHTMLWriter& rWrt, const SwFormat& rFormat,

    // if necessary, start a new list item
    bool bNumberedForListItem = bNumbered;
    if (!bNumberedForListItem && rWrt.mbXHTML && bAtLeastOneNumbered)
    if (!bNumberedForListItem)
    {
        // OutHTML_NumberBulletListEnd() will end a list item if at least one text node is numbered
        // in the list, so open the list item with the same condition here.
        bNumberedForListItem = true;
        // Open a list also for the leading unnumbered nodes (= list headers in ODF terminology);
        // to do that, detect if this unnumbered node is the first in this list
        const auto& rPrevListInfo = rWrt.GetNumInfo();
        if (rPrevListInfo.GetNumRule() != aNumInfo.GetNumRule() || aNumInfo.IsRestart(rPrevListInfo)
            || rPrevListInfo.GetDepth() < aNumInfo.GetDepth())
            bNumberedForListItem = true;
    }
    if( rInfo.bInNumberBulletList && bNumberedForListItem )
    {
        HtmlWriter html(rWrt.Strm(), rWrt.maNamespace);
        html.prettyPrint(rWrt.m_bPrettyPrint);
        html.start(OOO_STRING_SVTOOLS_HTML_li);
        if( USHRT_MAX != nNumStart )
        if (!bNumbered)
        {
            // Handles list headers (<text:list-header> ODF element)
            html.attribute(OOO_STRING_SVTOOLS_HTML_O_style, "display: block");
        }
        else if (USHRT_MAX != nNumStart)
            html.attribute(OOO_STRING_SVTOOLS_HTML_O_value, OString::number(nNumStart));
        // Finish the opening element, but don't close it.
        html.characters("");
diff --git a/sw/source/filter/html/htmlnumwriter.cxx b/sw/source/filter/html/htmlnumwriter.cxx
index 8ca8c08..98b5817 100644
--- a/sw/source/filter/html/htmlnumwriter.cxx
+++ b/sw/source/filter/html/htmlnumwriter.cxx
@@ -84,8 +84,7 @@ void SwHTMLWriter::SetNextNumInfo( std::unique_ptr<SwHTMLNumRuleInfo> pNxt )
}

SwHTMLWriter& OutHTML_NumberBulletListStart( SwHTMLWriter& rWrt,
                                 const SwHTMLNumRuleInfo& rInfo,
                                 bool& rAtLeastOneNumbered )
                                 const SwHTMLNumRuleInfo& rInfo )
{
    SwHTMLNumRuleInfo& rPrevInfo = rWrt.GetNumInfo();
    bool bSameRule = rPrevInfo.GetNumRule() == rInfo.GetNumRule();
@@ -95,43 +94,6 @@ SwHTMLWriter& OutHTML_NumberBulletListStart( SwHTMLWriter& rWrt,
        return rWrt;
    }

    if (rWrt.mbXHTML && !rInfo.IsNumbered())
    {
        // If the list only consists of non-numbered text nodes, then don't start the list.
        bool bAtLeastOneNumbered = false;
        SwNodeOffset nPos = rWrt.m_pCurrentPam->GetPoint()->GetNodeIndex() + 1;
        SwNumRule* pNumRule = nullptr;
        while (true)
        {
            const SwNode* pNode = rWrt.m_pDoc->GetNodes()[nPos];
            if (!pNode->IsTextNode())
            {
                break;
            }

            const SwTextNode* pTextNode = pNode->GetTextNode();
            if (!pTextNode->GetNumRule() || (pNumRule && pTextNode->GetNumRule() != pNumRule))
            {
                // Node is not in the same numbering as the previous one.
                break;
            }

            pNumRule = pTextNode->GetNumRule();
            if (pTextNode->IsNumbered())
            {
                bAtLeastOneNumbered = true;
                break;
            }
            ++nPos;
        }

        rAtLeastOneNumbered = bAtLeastOneNumbered;
        if (!bAtLeastOneNumbered)
        {
            return rWrt;
        }
    }

    bool bStartValue = false;
    if( !bSameRule && rInfo.GetDepth() )
    {
@@ -322,42 +284,8 @@ SwHTMLWriter& OutHTML_NumberBulletListEnd( SwHTMLWriter& rWrt,
    bool bListEnd = !bSameRule || rNextInfo.GetDepth() < rInfo.GetDepth() || rNextInfo.IsRestart(rInfo);
    bool bNextIsSubitem = !bListEnd && rNextInfo.GetDepth() > rInfo.GetDepth();

    std::optional<bool> oAtLeastOneNumbered;
    if (!rInfo.IsNumbered())
    {
        oAtLeastOneNumbered = false;
        SwNodeOffset nPos = rWrt.m_pCurrentPam->GetPoint()->GetNodeIndex() - 1;
        SwNumRule* pNumRule = nullptr;
        while (true)
        {
            const SwNode* pNode = rWrt.m_pDoc->GetNodes()[nPos];
            if (!pNode->IsTextNode())
            {
                break;
            }

            const SwTextNode* pTextNode = pNode->GetTextNode();
            if (!pTextNode->GetNumRule() || (pNumRule && pTextNode->GetNumRule() != pNumRule))
            {
                // Node is not in the same numbering as the next one.
                break;
            }

            pNumRule = pTextNode->GetNumRule();
            if (pTextNode->IsNumbered())
            {
                oAtLeastOneNumbered = true;
                break;
            }
            --nPos;
        }
    }

    // The list is numbered if the previous text node is numbered or any other previous text
    // node is numbered.
    bool bPrevIsNumbered = rInfo.IsNumbered() || *oAtLeastOneNumbered;
    // XHTML </li> for the list item content, if there is an open <li>.
    if ((bListEnd && bPrevIsNumbered) || (!bListEnd && !bNextIsSubitem && rNextInfo.IsNumbered()))
    if (bListEnd || (!bNextIsSubitem && rNextInfo.IsNumbered()))
    {
        HTMLOutFuncs::Out_AsciiTag(
            rWrt.Strm(), Concat2View(rWrt.GetNamespace() + OOO_STRING_SVTOOLS_HTML_li),
@@ -369,15 +297,6 @@ SwHTMLWriter& OutHTML_NumberBulletListEnd( SwHTMLWriter& rWrt,
        return rWrt;
    }

    if (rWrt.mbXHTML && !rInfo.IsNumbered())
    {
        // If the list only consisted of non-numbered text nodes, then don't end the list.
        if (!*oAtLeastOneNumbered)
        {
            return rWrt;
        }
    }

    OSL_ENSURE( rWrt.m_nLastParaToken == HtmlTokenId::NONE,
                "<PRE> was not closed before </OL>." );
    sal_uInt16 nNextDepth =
diff --git a/sw/source/filter/html/wrthtml.hxx b/sw/source/filter/html/wrthtml.hxx
index 801b903..00d8372 100644
--- a/sw/source/filter/html/wrthtml.hxx
+++ b/sw/source/filter/html/wrthtml.hxx
@@ -716,8 +716,7 @@ SwHTMLWriter& OutCSS1_NumberBulletListStyleOpt( SwHTMLWriter& rWrt, const SwNumR
                                    sal_uInt8 nLevel );

SwHTMLWriter& OutHTML_NumberBulletListStart( SwHTMLWriter& rWrt,
                                 const SwHTMLNumRuleInfo& rInfo,
                                 bool& rAtLeastOneNumbered );
                                 const SwHTMLNumRuleInfo& rInfo );
SwHTMLWriter& OutHTML_NumberBulletListEnd( SwHTMLWriter& rWrt,
                               const SwHTMLNumRuleInfo& rNextInfo );