tdf#157103: fix SVG whitespace handling
Previous code tried to hack some tricks to restore whitespaces after
trimming them according to the xml:space attribute value. But it was
wrong in multiple ways.
1. The collapsed space must stay in the block where its start was.
When a block ended with a space, then trimming the space from it,
and adding to a next block, can change the size of the space.
2. The shift of a line (e.g., specifying x and y values) doesn't end
the logical line. A space before such a shift must be kept by the
same rules, as if there weren't a shift.
3. A block with xml:space="preserve" is treated as if it consists of
all non-whitespace characters, even if its leading or trailing
characters are spaces. I.e., a trailing space in a block before,
or a leading space in a block after, should be collapsed into a
single space, not removed - even when the space-preserving block
itself is made invisible.
Change-Id: Ic778d1e9d6b9d0c342ea74ad78d44bb47bc8d708
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/166239
Tested-by: Mike Kaganski <mike.kaganski@collabora.com>
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
diff --git a/svgio/inc/svgcharacternode.hxx b/svgio/inc/svgcharacternode.hxx
index d81066a..20c60d7 100644
--- a/svgio/inc/svgcharacternode.hxx
+++ b/svgio/inc/svgcharacternode.hxx
@@ -38,11 +38,10 @@ namespace svgio::svgreader
/// the string data
OUString maText;
// keep a copy of string data before space handling
OUString maTextBeforeSpaceHandling;
SvgTspanNode* mpParentLine;
bool mbHadTrailingSpace = false;
/// local helpers
rtl::Reference<drawinglayer::primitive2d::BasePrimitive2D> createSimpleTextPrimitive(
SvgTextPosition& rSvgTextPosition,
@@ -65,8 +64,7 @@ namespace svgio::svgreader
virtual const SvgStyleAttributes* getSvgStyleAttributes() const override;
void decomposeText(drawinglayer::primitive2d::Primitive2DContainer& rTarget, SvgTextPosition& rSvgTextPosition) const;
void whiteSpaceHandling();
SvgCharacterNode* addGap(SvgCharacterNode* pPreviousCharacterNode);
SvgCharacterNode* whiteSpaceHandling(SvgCharacterNode* pPreviousCharacterNode);
void concatenate(std::u16string_view rText);
/// Text content
diff --git a/svgio/qa/cppunit/SvgImportTest.cxx b/svgio/qa/cppunit/SvgImportTest.cxx
index 93921d1..e05fe53 100644
--- a/svgio/qa/cppunit/SvgImportTest.cxx
+++ b/svgio/qa/cppunit/SvgImportTest.cxx
@@ -749,7 +749,7 @@ CPPUNIT_TEST_FIXTURE(Test, testTdf85770)
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "height"_ostr, "11");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "familyname"_ostr, "Times New Roman");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "fontcolor"_ostr, "#000000");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "text"_ostr, "Start");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "text"_ostr, "Start ");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "height"_ostr, "11");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "familyname"_ostr, "Times New Roman");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[3]"_ostr, "fontcolor"_ostr, "#000000");
@@ -782,17 +782,17 @@ CPPUNIT_TEST_FIXTURE(Test, testTdf93583)
xmlDocUniquePtr pDocument = dumpAndParseSvg(u"/svgio/qa/cppunit/data/tdf93583.svg");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "text"_ostr, "This is the");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "x"_ostr, "62");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "x"_ostr, "58");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "y"_ostr, "303");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "width"_ostr, "16");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "height"_ostr, "16");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "text"_ostr, " first");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "x"_ostr, "129");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "text"_ostr, " first ");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "x"_ostr, "125");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "y"_ostr, "303");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "width"_ostr, "32");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "height"_ostr, "32");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[3]"_ostr, "text"_ostr, " line");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[3]"_ostr, "x"_ostr, "188");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[3]"_ostr, "text"_ostr, "line");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[3]"_ostr, "x"_ostr, "192");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[3]"_ostr, "y"_ostr, "303");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[3]"_ostr, "width"_ostr, "16");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[3]"_ostr, "height"_ostr, "16");
@@ -802,28 +802,28 @@ CPPUNIT_TEST_FIXTURE(Test, testTdf156616)
{
xmlDocUniquePtr pDocument = dumpAndParseSvg(u"/svgio/qa/cppunit/data/tdf156616.svg");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "text"_ostr, "First");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "text"_ostr, "First ");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "x"_ostr, "114");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "y"_ostr, "103");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "text"_ostr, " line");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "x"_ostr, "143");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "text"_ostr, "line ");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "x"_ostr, "147");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "y"_ostr, "103");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[3]"_ostr, "text"_ostr, "Second line");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[3]"_ostr, "x"_ostr, "114");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[3]"_ostr, "y"_ostr, "122");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[4]"_ostr, "text"_ostr, "First");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[4]"_ostr, "x"_ostr, "87");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[4]"_ostr, "text"_ostr, "First ");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[4]"_ostr, "x"_ostr, "85");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[4]"_ostr, "y"_ostr, "153");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[5]"_ostr, "text"_ostr, " line");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[5]"_ostr, "x"_ostr, "116");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[5]"_ostr, "text"_ostr, "line ");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[5]"_ostr, "x"_ostr, "118");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[5]"_ostr, "y"_ostr, "153");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[6]"_ostr, "text"_ostr, "Second line");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[6]"_ostr, "x"_ostr, "77");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[6]"_ostr, "y"_ostr, "172");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[7]"_ostr, "text"_ostr, "First");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[7]"_ostr, "x"_ostr, "59");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[7]"_ostr, "text"_ostr, "First ");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[7]"_ostr, "x"_ostr, "55");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[7]"_ostr, "y"_ostr, "203");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[8]"_ostr, "text"_ostr, " line");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[8]"_ostr, "text"_ostr, "line ");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[8]"_ostr, "x"_ostr, "88");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[8]"_ostr, "y"_ostr, "203");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[9]"_ostr, "text"_ostr, "Second line");
@@ -1108,12 +1108,12 @@ CPPUNIT_TEST_FIXTURE(Test, testTdf156251)
// Without the fix in place, this test would have failed with
// - Expected: 'You are '
// - Actual : 'You are'
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "text"_ostr, "You are");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "text"_ostr, " not");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "text"_ostr, "You are ");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "text"_ostr, "not");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[3]"_ostr, "text"_ostr, " a banana!");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[4]"_ostr, "text"_ostr, "You are");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[5]"_ostr, "text"_ostr, " not");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[6]"_ostr, "text"_ostr, " a banana!");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[5]"_ostr, "text"_ostr, " not ");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[6]"_ostr, "text"_ostr, "a banana!");
}
CPPUNIT_TEST_FIXTURE(Test, testMaskText)
@@ -1546,15 +1546,15 @@ CPPUNIT_TEST_FIXTURE(Test, testTdf156837)
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "x"_ostr, "114");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "y"_ostr, "103");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "height"_ostr, "16");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "text"_ostr, "x");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "x"_ostr, "122");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[1]"_ostr, "text"_ostr, "x ");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "x"_ostr, "126");
// Without the fix in place, this test would have failed with
// - Expected: 94
// - Actual : 103
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "y"_ostr, "94");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "height"_ostr, "10");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "text"_ostr, " 3");
assertXPath(pDocument, "/primitive2D/transform/textsimpleportion[2]"_ostr, "text"_ostr, "3");
}
CPPUNIT_TEST_FIXTURE(Test, testTdf156271)
diff --git a/svgio/source/svgreader/svgcharacternode.cxx b/svgio/source/svgreader/svgcharacternode.cxx
index 7fdac9e..0aea197 100644
--- a/svgio/source/svgreader/svgcharacternode.cxx
+++ b/svgio/source/svgreader/svgcharacternode.cxx
@@ -486,53 +486,80 @@ namespace svgio::svgreader
}
}
void SvgCharacterNode::whiteSpaceHandling()
SvgCharacterNode*
SvgCharacterNode::whiteSpaceHandling(SvgCharacterNode* pPreviousCharacterNode)
{
bool bIsDefault(XmlSpace::Default == getXmlSpace());
// if xml:space="default" then remove all newline characters, otherwise convert them to space
// convert tab to space too
maText = maTextBeforeSpaceHandling = maText.replaceAll(u"\n", bIsDefault ? u"" : u" ").replaceAll(u"\t", u" ");
maText = maText.replaceAll(u"\n", bIsDefault ? u"" : u" ").replaceAll(u"\t", u" ");
if(bIsDefault)
if (!bIsDefault)
{
// strip of all leading and trailing spaces
// and consolidate contiguous space
maText = consolidateContiguousSpace(maText.trim());
}
}
SvgCharacterNode* SvgCharacterNode::addGap(SvgCharacterNode* pPreviousCharacterNode)
{
// maText may have lost all text. If that's the case, ignore as invalid character node
// Also ignore if maTextBeforeSpaceHandling just have spaces
if(!maText.isEmpty() && !o3tl::trim(maTextBeforeSpaceHandling).empty())
{
if(pPreviousCharacterNode)
if (maText.isEmpty())
{
bool bAddGap(true);
// Do not add a gap if last node doesn't end with a space and
// current note doesn't start with a space
const sal_uInt32 nLastLength(pPreviousCharacterNode->maTextBeforeSpaceHandling.getLength());
if(pPreviousCharacterNode->maTextBeforeSpaceHandling[nLastLength - 1] != ' ' && maTextBeforeSpaceHandling[0] != ' ')
bAddGap = false;
// Do not add a gap if this node and last node are in different lines
if(pPreviousCharacterNode->mpParentLine != mpParentLine)
bAddGap = false;
// add in-between whitespace (single space) to the beginning of the current character node
if(bAddGap)
{
maText = " " + maText;
}
// Ignore this empty node for the purpose of whitespace handling
return pPreviousCharacterNode;
}
// this becomes the previous character node
if (pPreviousCharacterNode && pPreviousCharacterNode->mbHadTrailingSpace)
{
// pPreviousCharacterNode->mbHadTrailingSpace implies its xml:space="default".
// Even if this xml:space="preserve" node is whitespace-only, the trailing space
// of the previous node is significant - restore it
pPreviousCharacterNode->maText += " ";
}
return this;
}
return pPreviousCharacterNode;
bool bHadLeadingSpace = maText.startsWith(" ");
mbHadTrailingSpace = maText.endsWith(" "); // Only set for xml:space="default"
// strip of all leading and trailing spaces
// and consolidate contiguous space
maText = consolidateContiguousSpace(maText.trim());
if (pPreviousCharacterNode)
{
if (pPreviousCharacterNode->mbHadTrailingSpace)
{
// pPreviousCharacterNode->mbHadTrailingSpace implies its xml:space="default".
// The previous node already has a pending trailing space.
if (maText.isEmpty())
{
// Leading spaces in this empty node are insignificant.
// Ignore this empty node for the purpose of whitespace handling
return pPreviousCharacterNode;
}
// The previous node's trailing space is significant - restore it. Note that
// it is incorrect to insert a space in this node instead: the spaces in
// different nodes may have different size
pPreviousCharacterNode->maText += " ";
return this;
}
if (bHadLeadingSpace)
{
// This possibly whitespace-only xml:space="default" node goes after another
// node either having xml:space="default", but without a trailing space; or
// having xml:space="preserve" (in that case, it's irrelevant if that node had
// any trailing spaces).
if (!maText.isEmpty())
{
// The leading whitespace in this node is significant - restore it
maText = " " + maText;
}
// The trailing whitespace in this node may or may not be
// significant (it will be significant, if there will be more nodes). Keep it as
// it is (even empty), but return this, to participate in whitespace handling
return this;
}
}
// No previous node, or no leading/trailing space on the previous node's boundary: if
// this is whitespace-only, its whitespace is never significant
return maText.isEmpty() ? pPreviousCharacterNode : this;
}
void SvgCharacterNode::concatenate(std::u16string_view rText)
diff --git a/svgio/source/svgreader/svgdocumenthandler.cxx b/svgio/source/svgreader/svgdocumenthandler.cxx
index 85a5444..b74e981 100644
--- a/svgio/source/svgreader/svgdocumenthandler.cxx
+++ b/svgio/source/svgreader/svgdocumenthandler.cxx
@@ -70,7 +70,46 @@ namespace svgio::svgreader
namespace
{
svgio::svgreader::SvgCharacterNode* whiteSpaceHandling(svgio::svgreader::SvgNode const * pNode, svgio::svgreader::SvgTspanNode* pParentLine, svgio::svgreader::SvgCharacterNode* pLast)
using CharacterNodeHandlerFunc
= svgio::svgreader::SvgCharacterNode*(svgio::svgreader::SvgCharacterNode* pCharNode,
svgio::svgreader::SvgTspanNode* pParentLine,
svgio::svgreader::SvgCharacterNode* pLast);
// clean whitespace in text span
svgio::svgreader::SvgCharacterNode* whiteSpaceHandling(svgio::svgreader::SvgCharacterNode* pCharNode,
svgio::svgreader::SvgTspanNode* pParentLine,
svgio::svgreader::SvgCharacterNode* pLast)
{
pCharNode->setParentLine(pParentLine);
return pCharNode->whiteSpaceHandling(pLast);
}
// set correct widths of text lines
svgio::svgreader::SvgCharacterNode* calcTextLineWidths(svgio::svgreader::SvgCharacterNode* pCharNode,
svgio::svgreader::SvgTspanNode* pParentLine,
svgio::svgreader::SvgCharacterNode* /*pLast*/)
{
if (const SvgStyleAttributes* pSvgStyleAttributes = pCharNode->getSvgStyleAttributes())
{
const drawinglayer::attribute::FontAttribute aFontAttribute(
svgio::svgreader::SvgCharacterNode::getFontAttribute(*pSvgStyleAttributes));
double fFontWidth(pSvgStyleAttributes->getFontSizeNumber().solve(*pCharNode));
double fFontHeight(fFontWidth);
css::lang::Locale aLocale;
drawinglayer::primitive2d::TextLayouterDevice aTextLayouterDevice;
aTextLayouterDevice.setFontAttribute(aFontAttribute, fFontWidth, fFontHeight, aLocale);
double fTextWidth = aTextLayouterDevice.getTextWidth(pCharNode->getText(), 0.0,
pCharNode->getText().getLength());
pParentLine->concatenateTextLineWidth(fTextWidth);
}
return nullptr; // no pLast handling
}
svgio::svgreader::SvgCharacterNode* walkRecursive(svgio::svgreader::SvgNode const* pNode,
svgio::svgreader::SvgTspanNode* pParentLine,
svgio::svgreader::SvgCharacterNode* pLast,
CharacterNodeHandlerFunc* pHandlerFunc)
{
if(pNode)
{
@@ -87,34 +126,9 @@ namespace
{
case SVGToken::Character:
{
// clean whitespace in text span
svgio::svgreader::SvgCharacterNode* pCharNode = static_cast< svgio::svgreader::SvgCharacterNode* >(pCandidate);
pCharNode->setParentLine(pParentLine);
pCharNode->whiteSpaceHandling();
pLast = pCharNode->addGap(pLast);
double fTextWidth(0.0);
const SvgStyleAttributes* pSvgStyleAttributes = pCharNode->getSvgStyleAttributes();
if(pSvgStyleAttributes)
{
const drawinglayer::attribute::FontAttribute aFontAttribute(
svgio::svgreader::SvgCharacterNode::getFontAttribute(*pSvgStyleAttributes));
double fFontWidth(pSvgStyleAttributes->getFontSizeNumber().solve(*pCharNode));
double fFontHeight(fFontWidth);
css::lang::Locale aLocale;
drawinglayer::primitive2d::TextLayouterDevice aTextLayouterDevice;
aTextLayouterDevice.setFontAttribute(aFontAttribute, fFontWidth, fFontHeight, aLocale);
fTextWidth = aTextLayouterDevice.getTextWidth(pCharNode->getText(), 0.0, pCharNode->getText().getLength());
}
pParentLine->concatenateTextLineWidth(fTextWidth);
pLast = pHandlerFunc(pCharNode, pParentLine, pLast);
break;
}
case SVGToken::Tspan:
@@ -125,15 +139,15 @@ namespace
if(!pTspanNode->getX().empty() || !pTspanNode->getY().empty())
pParentLine = pTspanNode;
// recursively clean whitespaces in subhierarchy
pLast = whiteSpaceHandling(pCandidate, pParentLine, pLast);
// recursively handle subhierarchy
pLast = walkRecursive(pCandidate, pParentLine, pLast, pHandlerFunc);
break;
}
case SVGToken::TextPath:
case SVGToken::Tref:
{
// recursively clean whitespaces in subhierarchy
pLast = whiteSpaceHandling(pCandidate, pParentLine, pLast);
// recursively handle subhierarchy
pLast = walkRecursive(pCandidate, pParentLine, pLast, pHandlerFunc);
break;
}
default:
@@ -548,7 +562,12 @@ namespace
if(pTextNode)
{
// cleanup read strings
whiteSpaceHandling(pTextNode, static_cast< SvgTspanNode*>(pTextNode), nullptr);
// First pass: handle whitespace. This works in a way that handling a following
// node may append a space to a previous node; so correct line width calculation
// may only happen after this pass finishes
walkRecursive(pTextNode, static_cast<SvgTspanNode*>(pTextNode), nullptr, whiteSpaceHandling);
// Second pass: calculate line widths
walkRecursive(pTextNode, static_cast<SvgTspanNode*>(pTextNode), nullptr, calcTextLineWidths);
}
}