ofz#5566 sw: HTML import: ignore <DIV> in table structure elements

Looking at the HTML4 DTD https://www.w3.org/TR/html4/sgml/dtd.html,
inside TABLE only various elements defining the structure of the table
allowed, except inside cells (TD and TH elements).

DIV in a table but outside cells may cause cursor positions to go
off the rails, so better ignore such invalid DIV tags.

Reviewed-on: https://gerrit.libreoffice.org/48359
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Caolán McNamara <caolanm@redhat.com>
Tested-by: Caolán McNamara <caolanm@redhat.com>
(cherry picked from commit 8b1a83bffe35ae0e71735569512c1586bcb37b25)

Change-Id: Ia6195d80670631669c252d572242874b13642b74
Reviewed-on: https://gerrit.libreoffice.org/48381
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk>
diff --git a/sw/source/filter/html/htmltab.cxx b/sw/source/filter/html/htmltab.cxx
index b6e1d5d..b3f7ca1 100644
--- a/sw/source/filter/html/htmltab.cxx
+++ b/sw/source/filter/html/htmltab.cxx
@@ -19,6 +19,7 @@

#include <memory>
#include <hintids.hxx>
#include <comphelper/flagguard.hxx>
#include <vcl/svapp.hxx>
#include <vcl/wrkwin.hxx>
#include <editeng/boxitem.hxx>
@@ -3307,6 +3308,7 @@ void SwHTMLParser::BuildTableCell( HTMLTable *pCurTable, bool bReadOptions,
    if( !IsParserWorking() && !m_pPendStack )
        return;

    ::comphelper::FlagRestorationGuard g(m_isInTableStructure, false);
    CellSaveStruct* pSaveStruct;

    HtmlTokenId nToken = HtmlTokenId::NONE;
@@ -4972,6 +4974,7 @@ HTMLTable *SwHTMLParser::BuildTable( SvxAdjust eParentAdjust,
    if( !IsParserWorking() && !m_pPendStack )
        return nullptr;

    ::comphelper::FlagRestorationGuard g(m_isInTableStructure, true);
    HtmlTokenId nToken = HtmlTokenId::NONE;
    bool bPending = false;
    TableSaveStruct* pSaveStruct;
diff --git a/sw/source/filter/html/swhtml.cxx b/sw/source/filter/html/swhtml.cxx
index 3b18ef3..e483f92 100644
--- a/sw/source/filter/html/swhtml.cxx
+++ b/sw/source/filter/html/swhtml.cxx
@@ -298,6 +298,7 @@ SwHTMLParser::SwHTMLParser( SwDoc* pD, SwPaM& rCursor, SvStream& rIn,
    m_bRemoveHidden( false ),
    m_bBodySeen( false ),
    m_bReadingHeaderOrFooter( false ),
    m_isInTableStructure(false),
    m_pTempViewFrame(nullptr)
{
    m_nEventId = nullptr;
@@ -1544,26 +1545,32 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken )
    // divisions
    case HtmlTokenId::DIVISION_ON:
    case HtmlTokenId::CENTER_ON:
        if( m_nOpenParaToken != HtmlTokenId::NONE )
        if (!m_isInTableStructure)
        {
            if( IsReadPRE() )
                m_nOpenParaToken = HtmlTokenId::NONE;
            else
                EndPara();
            if (m_nOpenParaToken != HtmlTokenId::NONE)
            {
                if (IsReadPRE())
                    m_nOpenParaToken = HtmlTokenId::NONE;
                else
                    EndPara();
            }
            NewDivision( nToken );
        }
        NewDivision( nToken );
        break;

    case HtmlTokenId::DIVISION_OFF:
    case HtmlTokenId::CENTER_OFF:
        if( m_nOpenParaToken != HtmlTokenId::NONE )
        if (!m_isInTableStructure)
        {
            if( IsReadPRE() )
                m_nOpenParaToken = HtmlTokenId::NONE;
            else
                EndPara();
            if (m_nOpenParaToken != HtmlTokenId::NONE)
            {
                if (IsReadPRE())
                    m_nOpenParaToken = HtmlTokenId::NONE;
                else
                    EndPara();
            }
            EndDivision();
        }
        EndDivision();
        break;

    case HtmlTokenId::MULTICOL_ON:
diff --git a/sw/source/filter/html/swhtml.hxx b/sw/source/filter/html/swhtml.hxx
index 450f7dd..6e84380 100644
--- a/sw/source/filter/html/swhtml.hxx
+++ b/sw/source/filter/html/swhtml.hxx
@@ -480,6 +480,7 @@ class SwHTMLParser : public SfxHTMLParser, public SwClient

    bool m_bBodySeen : 1;
    bool m_bReadingHeaderOrFooter : 1;
    bool m_isInTableStructure;

    /// the names corresponding to the DOCINFO field subtypes INFO[1-4]
    OUString m_InfoNames[4];