tdf#151546: RTL text is reversed (Writer pdfimport)
This is a followup to commit 69e9925ded584113e52f84ef0ed7c224079fa061
for the fix of tdf#104597.
The Writer pdf import filter code is similar than the Draw part.
However, many fixes to the Draw part was not done in the Writer part historically.
This patch ports the fix of text run in the Draw part to the Writer
part. There is a todo related to continuous spaces issue which should
be fixed separately.
Also use CPPUNIT_ASSERT_EQUAL_MESSAGE for the output of xml content
instread of using std::cout, in case of unit test failure.
Change-Id: Id013700524750e6e5283d85eeab72d8075f16f1b
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141420
Tested-by: Thorsten Behrens <thorsten.behrens@allotropia.de>
Reviewed-by: Thorsten Behrens <thorsten.behrens@allotropia.de>
(cherry picked from commit f6004e1c457ddab5e0c91e6159875d25130b108a)
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/141927
Tested-by: Jenkins
Reviewed-by: Xisco Fauli <xiscofauli@libreoffice.org>
diff --git a/sdext/source/pdfimport/test/tests.cxx b/sdext/source/pdfimport/test/tests.cxx
index a520569..fe2f659 100644
--- a/sdext/source/pdfimport/test/tests.cxx
+++ b/sdext/source/pdfimport/test/tests.cxx
@@ -793,36 +793,54 @@ namespace
new OutputWrapString(aOutput),
nullptr));
// std::cout << aOutput << std::endl;
xmlDocUniquePtr pXmlDoc(xmlParseDoc(reinterpret_cast<xmlChar const *>(aOutput.getStr())));
// Test for امُ عَلَيْكَ
// TODO: How to get the "عَلَيْكَ" in xpath, as shown after the <text:s> tag?
OString xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 14821.9583333333 2159.23861112778)']/draw:text-box/text:p/text:span";
OUString sContent = getXPathContent(pXmlDoc, xpath); // u"\nا\nُ\nم\n"
CPPUNIT_ASSERT_EQUAL(OUString(u"اُم"), sContent.replaceAll("\n", ""));
CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"اُم"), sContent.replaceAll("\n", ""));
// Test for ٱلَّسَل . It appears in the 3rd frame, i.e. after the امُ عَلَيْكَ which is in the 2nd frame (from left to right)
// thus these two frames together appear as ٱلَّسَل امُ عَلَيْكَ in Draw.
xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 17420.1666666667 2159.23861112778)']/draw:text-box/text:p/text:span";
sContent = getXPathContent(pXmlDoc, xpath);
CPPUNIT_ASSERT_EQUAL(OUString(u"ٱلَّسَل"), sContent.replaceAll("\n", ""));
CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"ٱلَّسَل"), sContent.replaceAll("\n", ""));
// Test for "LibreOffice LTR"
// TODO: How to get the "LTR" as shown after the <text:s> tag?
xpath = "//draw:frame[@draw:transform='matrix(917.222222222222 0 0 917.222222222222 12779.375 5121.79583335)']/draw:text-box/text:p/text:span";
sContent = getXPathContent(pXmlDoc, xpath);
CPPUNIT_ASSERT_EQUAL(OUString(u"LibreOffice"), sContent.replaceAll("\n", ""));
CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"LibreOffice"), sContent.replaceAll("\n", ""));
/* Test for Chinese characters */
// Use last() instead of matrix below, because the matrix may be different on different OS due to fallback of Chinese fonts.
xpath = "//draw:frame[last()]/draw:text-box/text:p/text:span";
sContent = getXPathContent(pXmlDoc, xpath);
CPPUNIT_ASSERT_EQUAL(OUString(u"中文测试,中文"), sContent.replaceAll("\n", ""));
CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput.getStr(), OUString(u"中文测试,中文"), sContent.replaceAll("\n", ""));
// Test pdf text run in the Writer PDF import filter
xAdaptor->setTreeVisitorFactory(createWriterTreeVisitorFactory());
OString aOutput2;
xAdaptor->odfConvert(m_directories.getURLFromSrc(u"/sdext/source/pdfimport/test/testdocs/tdf104597_textrun.pdf"),
new OutputWrapString(aOutput2),
nullptr);
// FIXME: the same draw:frame is duplicated in the xml output,
// e.g. there are two draw:frame with draw:z-index="3" with the same content.
xmlDocUniquePtr pXmlDoc2(xmlParseDoc(reinterpret_cast<xmlChar const *>(aOutput2.getStr())));
xpath = "//draw:frame[@draw:z-index='3'][1]/draw:text-box/text:p/text:span";
sContent = getXPathContent(pXmlDoc2, xpath).replaceAll("\n", "");
CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput2.getStr(), OUString(u"ٱلَّسَل"), sContent);
xpath = "//draw:frame[@draw:z-index='2'][1]/draw:text-box/text:p/text:span";
sContent = getXPathContent(pXmlDoc2, xpath).replaceAll("\n", "");
// need to use اُم rather than اُم َعَلْيَك here, because this node may be different on different systems
CPPUNIT_ASSERT_EQUAL(true, sContent.match(u"اُم"));
xpath = "//draw:frame[last()]/draw:text-box/text:p/text:span";
sContent = getXPathContent(pXmlDoc2, xpath);
CPPUNIT_ASSERT_EQUAL_MESSAGE(aOutput2.getStr(), OUString(u"中文测试,中文"), sContent.replaceAll("\n", ""));
#endif
}
CPPUNIT_TEST_SUITE(PDFITest);
CPPUNIT_TEST(testXPDFParser);
CPPUNIT_TEST(testOdfWriterExport);
diff --git a/sdext/source/pdfimport/tree/writertreevisiting.cxx b/sdext/source/pdfimport/tree/writertreevisiting.cxx
index a3d66f3..af16cde 100644
--- a/sdext/source/pdfimport/tree/writertreevisiting.cxx
+++ b/sdext/source/pdfimport/tree/writertreevisiting.cxx
@@ -31,12 +31,28 @@
#include <basegfx/polygon/b2dpolypolygontools.hxx>
#include <osl/diagnose.h>
#include <com/sun/star/i18n/CharacterClassification.hpp>
#include <com/sun/star/i18n/DirectionProperty.hpp>
#include <comphelper/string.hxx>
using namespace ::com::sun::star;
using namespace ::com::sun::star::lang;
using namespace ::com::sun::star::i18n;
using namespace ::com::sun::star::uno;
namespace pdfi
{
const Reference< XCharacterClassification >& WriterXmlEmitter::GetCharacterClassification()
{
if ( !mxCharClass.is() )
{
Reference< XComponentContext > xContext( m_rEmitContext.m_xContext, uno::UNO_SET_THROW );
mxCharClass = CharacterClassification::create(xContext);
}
return mxCharClass;
}
void WriterXmlEmitter::visit( HyperlinkElement& elem, const std::list< std::unique_ptr<Element> >::const_iterator& )
{
if( elem.Children.empty() )
@@ -72,8 +88,31 @@ void WriterXmlEmitter::visit( TextElement& elem, const std::list< std::unique_pt
m_rEmitContext.rStyles.getStyleName( elem.StyleId );
}
OUString str(elem.Text.toString());
// Check for RTL
bool isRTL = false;
Reference< i18n::XCharacterClassification > xCC( GetCharacterClassification() );
if( xCC.is() )
{
for(int i=1; i< elem.Text.getLength(); i++)
{
i18n::DirectionProperty nType = static_cast<i18n::DirectionProperty>(xCC->getCharacterDirection( str, i ));
if ( nType == i18n::DirectionProperty_RIGHT_TO_LEFT ||
nType == i18n::DirectionProperty_RIGHT_TO_LEFT_ARABIC ||
nType == i18n::DirectionProperty_RIGHT_TO_LEFT_EMBEDDING ||
nType == i18n::DirectionProperty_RIGHT_TO_LEFT_OVERRIDE
)
isRTL = true;
}
}
if (isRTL) // If so, reverse string
str = ::comphelper::string::reverseString(str);
m_rEmitContext.rEmitter.beginTag( "text:span", aProps );
m_rEmitContext.rEmitter.write( elem.Text.makeStringAndClear() );
// TODO: reserve continuous spaces, see DrawXmlEmitter::visit( TextElement& elem...)
m_rEmitContext.rEmitter.write(str);
auto this_it = elem.Children.begin();
while( this_it != elem.Children.end() && this_it->get() != &elem )
{
@@ -797,13 +836,12 @@ void WriterXmlOptimizer::optimizeTextElements(Element& rParent)
}
}
// concatenate consecutive text elements unless there is a
// font or text color or matrix change, leave a new span in that case
// font or text color change, leave a new span in that case
if( pCur->FontId == pNext->FontId &&
rCurGC.FillColor.Red == rNextGC.FillColor.Red &&
rCurGC.FillColor.Green == rNextGC.FillColor.Green &&
rCurGC.FillColor.Blue == rNextGC.FillColor.Blue &&
rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha &&
rCurGC.Transformation == rNextGC.Transformation
rCurGC.FillColor.Alpha == rNextGC.FillColor.Alpha
)
{
pCur->updateGeometryWith( pNext );
diff --git a/sdext/source/pdfimport/tree/writertreevisiting.hxx b/sdext/source/pdfimport/tree/writertreevisiting.hxx
index 1c1507f..e473c27 100644
--- a/sdext/source/pdfimport/tree/writertreevisiting.hxx
+++ b/sdext/source/pdfimport/tree/writertreevisiting.hxx
@@ -24,6 +24,8 @@
#include <pdfihelper.hxx>
#include <com/sun/star/i18n/XCharacterClassification.hpp>
namespace pdfi
{
struct DrawElement;
@@ -80,12 +82,14 @@ namespace pdfi
class WriterXmlEmitter : public ElementTreeVisitor
{
private:
css::uno::Reference< css::i18n::XCharacterClassification > mxCharClass;
EmitContext& m_rEmitContext ;
static void fillFrameProps( DrawElement& rElem,
PropertyMap& rProps,
const EmitContext& rEmitContext );
public:
const css::uno::Reference<css::i18n::XCharacterClassification >& GetCharacterClassification();
explicit WriterXmlEmitter(EmitContext& rEmitContext) :
m_rEmitContext(rEmitContext)
{}