ofz#5566 sw: HTML import: ignore <DIV> in table structure elements
Looking at the HTML4 DTD https://www.w3.org/TR/html4/sgml/dtd.html,
inside TABLE only various elements defining the structure of the table
allowed, except inside cells (TD and TH elements).
DIV in a table but outside cells may cause cursor positions to go
off the rails, so better ignore such invalid DIV tags.
Change-Id: Ia6195d80670631669c252d572242874b13642b74
Reviewed-on: https://gerrit.libreoffice.org/48359
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Caolán McNamara <caolanm@redhat.com>
Tested-by: Caolán McNamara <caolanm@redhat.com>
diff --git a/sw/source/filter/html/htmltab.cxx b/sw/source/filter/html/htmltab.cxx
index d2fff4c..8ed2ca3 100644
--- a/sw/source/filter/html/htmltab.cxx
+++ b/sw/source/filter/html/htmltab.cxx
@@ -19,6 +19,7 @@
#include <memory>
#include <hintids.hxx>
#include <comphelper/flagguard.hxx>
#include <vcl/svapp.hxx>
#include <vcl/wrkwin.hxx>
#include <editeng/boxitem.hxx>
@@ -3133,6 +3134,7 @@ void SwHTMLParser::BuildTableCell( HTMLTable *pCurTable, bool bReadOptions,
if( !IsParserWorking() && !m_pPendStack )
return;
::comphelper::FlagRestorationGuard g(m_isInTableStructure, false);
std::unique_ptr<CellSaveStruct> xSaveStruct;
HtmlTokenId nToken = HtmlTokenId::NONE;
@@ -4946,6 +4948,7 @@ std::shared_ptr<HTMLTable> SwHTMLParser::BuildTable(SvxAdjust eParentAdjust,
if (!IsParserWorking() && !m_pPendStack)
return std::shared_ptr<HTMLTable>();
::comphelper::FlagRestorationGuard g(m_isInTableStructure, true);
HtmlTokenId nToken = HtmlTokenId::NONE;
bool bPending = false;
std::unique_ptr<TableSaveStruct> xSaveStruct;
diff --git a/sw/source/filter/html/swhtml.cxx b/sw/source/filter/html/swhtml.cxx
index aad7989..4d939b4 100644
--- a/sw/source/filter/html/swhtml.cxx
+++ b/sw/source/filter/html/swhtml.cxx
@@ -300,6 +300,7 @@ SwHTMLParser::SwHTMLParser( SwDoc* pD, SwPaM& rCursor, SvStream& rIn,
m_bRemoveHidden( false ),
m_bBodySeen( false ),
m_bReadingHeaderOrFooter( false ),
m_isInTableStructure(false),
m_nTableDepth( 0 ),
m_pTempViewFrame(nullptr)
{
@@ -1557,26 +1558,32 @@ void SwHTMLParser::NextToken( HtmlTokenId nToken )
// divisions
case HtmlTokenId::DIVISION_ON:
case HtmlTokenId::CENTER_ON:
if( m_nOpenParaToken != HtmlTokenId::NONE )
if (!m_isInTableStructure)
{
if( IsReadPRE() )
m_nOpenParaToken = HtmlTokenId::NONE;
else
EndPara();
if (m_nOpenParaToken != HtmlTokenId::NONE)
{
if (IsReadPRE())
m_nOpenParaToken = HtmlTokenId::NONE;
else
EndPara();
}
NewDivision( nToken );
}
NewDivision( nToken );
break;
case HtmlTokenId::DIVISION_OFF:
case HtmlTokenId::CENTER_OFF:
if( m_nOpenParaToken != HtmlTokenId::NONE )
if (!m_isInTableStructure)
{
if( IsReadPRE() )
m_nOpenParaToken = HtmlTokenId::NONE;
else
EndPara();
if (m_nOpenParaToken != HtmlTokenId::NONE)
{
if (IsReadPRE())
m_nOpenParaToken = HtmlTokenId::NONE;
else
EndPara();
}
EndDivision();
}
EndDivision();
break;
case HtmlTokenId::MULTICOL_ON:
diff --git a/sw/source/filter/html/swhtml.hxx b/sw/source/filter/html/swhtml.hxx
index cd43c8f..9674a88 100644
--- a/sw/source/filter/html/swhtml.hxx
+++ b/sw/source/filter/html/swhtml.hxx
@@ -492,6 +492,7 @@ class SwHTMLParser : public SfxHTMLParser, public SwClient
bool m_bBodySeen : 1;
bool m_bReadingHeaderOrFooter : 1;
bool m_isInTableStructure;
sal_Int32 m_nTableDepth;