tdf#139627 Test justified Arabic/Persian text to avoid gaps/big overlaps

With the patches 3901e02..62ff105 from Khaled, many problems with the
justified Arabic/Persian text which were related to Kashida are fixed.
Here, we add a test for tdf#139627 which converts the example file to
the PDF format, then checks the width and the position of the characters
in the output to make sure:

* The characters are present in the PDF file in the correct order
* The characters are joined together
* The diacritic mark is positioned correctly
* The overlapping of the tatweel character is not more than 10% of the
  first character

Sample odt file uses "Noto Arabic Sans" font, which is available via
LibreOffice on all platforms.

One may run the test with:

    make CPPUNIT_TEST_NAME=testTdf139627 -sr CppunitTest_vcl_pdfexport

Change-Id: I7a826a1b43ee842978decb0cf9a5e2a3b7219982
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/138328
Tested-by: Jenkins
Reviewed-by: Hossein <hossein@libreoffice.org>
diff --git a/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt b/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt
new file mode 100644
index 0000000..6ca6ad1
--- /dev/null
+++ b/vcl/qa/cppunit/pdfexport/data/justified-arabic-kashida.odt
Binary files differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index 057773c..4b25202 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -3535,6 +3535,90 @@ CPPUNIT_TEST_FIXTURE(PdfExportTest, testBitmapScaledown)
}
} // end anonymous namespace

CPPUNIT_TEST_FIXTURE(PdfExportTest, testTdf139627)
{
    aMediaDescriptor["FilterName"] <<= OUString("writer_pdf_Export");
    saveAsPDF(u"justified-arabic-kashida.odt");
    std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument = parseExport();
    CPPUNIT_ASSERT(pPdfDocument);

    // The document has one page.
    CPPUNIT_ASSERT_EQUAL(1, pPdfDocument->getPageCount());
    std::unique_ptr<vcl::pdf::PDFiumPage> pPdfPage = pPdfDocument->openPage(/*nIndex=*/0);
    CPPUNIT_ASSERT(pPdfPage);

    // 7 or 8 objects, 4 text, others are path
    int nPageObjectCount = pPdfPage->getObjectCount();
    CPPUNIT_ASSERT_GREATEREQUAL(7, nPageObjectCount);

    // 4 text objects, "رم" (reh+mim), then "ِ" (kasreh), tatweel, and "ج" (jeh)
    OUString sText[4];

    /* With "Noto Sans Arabic" font, these are the X ranges on Linux:
        0: ( 61.75 - 218.35)
        1: (479.70 - 520.02)
        2: (209.40 - 457.08)
        3: (447.80 - 546.62)
    */
    basegfx::B2DRectangle aRect[4];

    std::unique_ptr<vcl::pdf::PDFiumTextPage> pTextPage = pPdfPage->getTextPage();
    std::unique_ptr<vcl::pdf::PDFiumPageObject> pPageObject;

    int nTextObjectCount = 0;
    for (int i = 0; i < nPageObjectCount; ++i)
    {
        pPageObject = pPdfPage->getObject(i);
        CPPUNIT_ASSERT_MESSAGE("no object", pPageObject != nullptr);
        if (pPageObject->getType() == vcl::pdf::PDFPageObjectType::Text)
        {
            sText[nTextObjectCount] = pPageObject->getText(pTextPage);
            aRect[nTextObjectCount] = pPageObject->getBounds();
            ++nTextObjectCount;
        }
    }
    CPPUNIT_ASSERT_EQUAL(4, nTextObjectCount);

    // Text: جِـرم (which means "mass" in Persian)
    // Rendered as (left to right): "reh + mim" - "tahtweel" - "kasreh" - "jeh"
    int rehmim = 0, kasreh = 1, tatweel = 2, jeh = 3;

    // Bad rendering can cause tatweel enumerated before kasreh
    // This can be the end of journey, but let's accept this for now
    if (sText[2].equals(u"ِ"))
    {
        tatweel = 1;
        kasreh = 2;
    }

    CPPUNIT_ASSERT_EQUAL(OUString(u"رم"), sText[rehmim].trim());
    CPPUNIT_ASSERT_EQUAL(OUString(u"ِ"), sText[kasreh].trim());
    CPPUNIT_ASSERT_EQUAL(OUString(u""), sText[tatweel].trim());
    CPPUNIT_ASSERT_EQUAL(OUString(u"ج"), sText[jeh].trim());

    // "Kasreh" should be within "jeh" character
    CPPUNIT_ASSERT_GREATER(aRect[jeh].getMinX(), aRect[kasreh].getMinX());
    CPPUNIT_ASSERT_LESS(aRect[jeh].getMaxX(), aRect[kasreh].getMaxX());

    // "Tatweel" should cover "jeh" and "reh"+"mim" to avoid gap
    // Checking right gap
    CPPUNIT_ASSERT_GREATER(aRect[jeh].getMinX(), aRect[tatweel].getMaxX());
    // Checking left gap
    // Kashida fails to reach to rehmim before the series of patches starting
    // with 3901e029bd39575f700e69a73818565d62226a23. The visible sypotom is
    // a gap in the left of Kashida.
    // CPPUNIT_ASSERT_LESS(aRect[rehmim].getMaxX(), aRect[tatweel].getMinX());

    // Overlappings of Kashida and surrounding characters is ~9% of the width
    // of the "jeh" character, while using "Noto Arabic Sans" font in this
    // specific example.
    // We set the hard limit of 10% here.
    CPPUNIT_ASSERT_LESS(0.1, fabs(aRect[jeh].getMinX() - aRect[tatweel].getMaxX())
                                 / aRect[jeh].getWidth());
    CPPUNIT_ASSERT_LESS(0.1, fabs(aRect[rehmim].getMaxX() - aRect[tatweel].getMinX())
                                 / aRect[jeh].getWidth());
}

CPPUNIT_PLUGIN_IMPLEMENT();

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */