tdf#139627 Test justified Arabic/Persian text to avoid gaps/big overlaps
With the patches 3901e02..62ff105 from Khaled, many problems with the
justified Arabic/Persian text which were related to Kashida are fixed.
Here, we add a test for tdf#139627 which converts the example file to
the PDF format, then checks the width and the position of the characters
in the output to make sure:
* The characters are present in the PDF file in the correct order
* The characters are joined together
* The diacritic mark is positioned correctly
* The overlapping of the tatweel character is not more than 10% of the
first character
Sample odt file uses "Noto Arabic Sans" font, which is available via
LibreOffice on all platforms.
One may run the test with:
make CPPUNIT_TEST_NAME=testTdf139627 -sr CppunitTest_vcl_pdfexport
Change-Id: I7a826a1b43ee842978decb0cf9a5e2a3b7219982
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/138328
Tested-by: Jenkins
Reviewed-by: Hossein <hossein@libreoffice.org>
diff --git a/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt b/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt
new file mode 100644
index 0000000..6ca6ad1
--- /dev/null
+++ b/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt
Binary files differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index 057773c..4b25202 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -3535,6 +3535,90 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testBitmapScaledown)
}
} // end anonymous namespace
CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf139627)
{
aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export");
saveAsPDF(u"justified-arabic-kashida.odt");
std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument = parseExport();
CPPUNIT_ASSERT(pPdfDocument);
// The document has one page.
CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount());
std::unique_ptr<vcl::pdf::PDFiumPage> pPdfPage = pPdfDocument->openPage(/*nIndex=*/0);
CPPUNIT_ASSERT(pPdfPage);
// 7 or 8 objects, 4 text, others are path
int nPageObjectCount = pPdfPage->getObjectCount();
CPPUNIT_ASSERT_GREATEREQUAL(7, nPageObjectCount);
// 4 text objects, "رم" (reh+mim), then "ِ" (kasreh), tatweel, and "ج" (jeh)
OUString sText[4];
/* With "Noto Sans Arabic" font, these are the X ranges on Linux:
0: ( 61.75 - 218.35)
1: (479.70 - 520.02)
2: (209.40 - 457.08)
3: (447.80 - 546.62)
*/
basegfx::B2DRectangle aRect[4];
std::unique_ptr<vcl::pdf::PDFiumTextPage> pTextPage = pPdfPage->getTextPage();
std::unique_ptr<vcl::pdf::PDFiumPageObject> pPageObject;
int nTextObjectCount = 0;
for (int i = 0; i < nPageObjectCount; ++i)
{
pPageObject = pPdfPage->getObject(i);
CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr);
if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text)
{
sText[nTextObjectCount] = pPageObject->getText(pTextPage);
aRect[nTextObjectCount] = pPageObject->getBounds();
++nTextObjectCount;
}
}
CPPUNIT_ASSERT_EQUAL(4, nTextObjectCount);
// Text: جِـرم (which means "mass" in Persian)
// Rendered as (left to right): "reh + mim" - "tahtweel" - "kasreh" - "jeh"
int rehmim = 0, kasreh = 1, tatweel = 2, jeh = 3;
// Bad rendering can cause tatweel enumerated before kasreh
// This can be the end of journey, but let's accept this for now
if (sText[2].equals(u"ِ"))
{
tatweel = 1;
kasreh = 2;
}
CPPUNIT_ASSERT_EQUAL(OUString(u"رم"), sText[rehmim].trim());
CPPUNIT_ASSERT_EQUAL(OUString(u"ِ"), sText[kasreh].trim());
CPPUNIT_ASSERT_EQUAL(OUString(u""), sText[tatweel].trim());
CPPUNIT_ASSERT_EQUAL(OUString(u"ج"), sText[jeh].trim());
// "Kasreh" should be within "jeh" character
CPPUNIT_ASSERT_GREATER(aRect[jeh].getMinX(), aRect[kasreh].getMinX());
CPPUNIT_ASSERT_LESS(aRect[jeh].getMaxX(), aRect[kasreh].getMaxX());
// "Tatweel" should cover "jeh" and "reh"+"mim" to avoid gap
// Checking right gap
CPPUNIT_ASSERT_GREATER(aRect[jeh].getMinX(), aRect[tatweel].getMaxX());
// Checking left gap
// Kashida fails to reach to rehmim before the series of patches starting
// with 3901e029bd39575f700e69a73818565d62226a23. The visible sypotom is
// a gap in the left of Kashida.
// CPPUNIT_ASSERT_LESS(aRect[rehmim].getMaxX(), aRect[tatweel].getMinX());
// Overlappings of Kashida and surrounding characters is ~9% of the width
// of the "jeh" character, while using "Noto Arabic Sans" font in this
// specific example.
// We set the hard limit of 10% here.
CPPUNIT_ASSERT_LESS(0.1, fabs(aRect[jeh].getMinX() - aRect[tatweel].getMaxX())
/ aRect[jeh].getWidth());
CPPUNIT_ASSERT_LESS(0.1, fabs(aRect[rehmim].getMaxX() - aRect[tatweel].getMinX())
/ aRect[jeh].getWidth());
}
CPPUNIT_PLUGIN_IMPLEMENT();
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */