tdf#96343, tdf#134766, tdf#97152: Fallback to ICU for case mapping

If we are requested to case map a character not present in our case
mapping data, fallback to ICU case mapping functions.

We should switch completely to ICU at some point, but we need to
evaluate our case mapping data and see if it differs from ICU and if
there is a reason for it.

Does not handle the case of U+03F2 turning into Sigma from tdf#97152.

Change-Id: Icf13ac7aab6d07b2a90fc0ff5ef1c4f50c7a7f8c
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/154803
Tested-by: Jenkins
Reviewed-by: خالد حسني <khaled@libreoffice.org>
diff --git a/i18npool/qa/cppunit/test_characterclassification.cxx b/i18npool/qa/cppunit/test_characterclassification.cxx
index dc8b361..5b01f73 100644
--- a/i18npool/qa/cppunit/test_characterclassification.cxx
+++ b/i18npool/qa/cppunit/test_characterclassification.cxx
@@ -115,6 +115,108 @@ CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testSigma)
    }
}

CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTdf96343)
{
    {
        // From upper case
        OUString sTest(u"ꙊꙌꙖ");
        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"ꙋꙍꙗ"), sLowerCase);
        OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase);
    }

    {
        // From lower case
        OUString sTest(u"ꙋꙍꙗ");
        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Ꙋꙍꙗ"), sTitleCase);
        OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString(u"ꙊꙌꙖ"), sUpperCase);
        OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, sUpperCase.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase);
    }

    {
        // From title case
        OUString sTest(u"Ꙋꙍꙗ");
        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Ꙋꙍꙗ"), sTitleCase);
        OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString(u"ꙊꙌꙖ"), sUpperCase);
        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"ꙋꙍꙗ"), sLowerCase);
    }
}

CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTdf134766)
{
    {
        // From upper case
        OUString sTest(u"QꞋORBꞋAL");
        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"qꞌorbꞌal"), sLowerCase);
        OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase);
    }

    {
        // From lower case
        OUString sTest(u"qꞌorbꞌal");
        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Qꞌorbꞌal"), sTitleCase);
        OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString(u"QꞋORBꞋAL"), sUpperCase);
        OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, sUpperCase.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase);
    }

    {
        // From title case
        OUString sTest(u"Qꞌorbꞌal");
        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Qꞌorbꞌal"), sTitleCase);
        OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString(u"QꞋORBꞋAL"), sUpperCase);
        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"qꞌorbꞌal"), sLowerCase);
    }
}

CPPUNIT_TEST_FIXTURE(TestCharacterClassification, testTdf97152)
{
    {
        // From upper case
        OUString sTest(u"ͲͰϽϾϿͿϏϹ");
        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"ͳͱͻͼͽϳϗϲ"), sLowerCase);
        //OUString sUpperCase = m_xCC->toUpper(sLowerCase, 0, sLowerCase.getLength(), {});
        //CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", sTest, sUpperCase);
    }

    {
        // From lower case
        OUString sTest(u"ͳͱͻͼͽϳϗϲ");
        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Ͳͱͻͼͽϳϗϲ"), sTitleCase);
        //OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {});
        //CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString(u"ͲͰϽϾϿͿϏϹ"), sUpperCase);
        //OUString sLowerCase = m_xCC->toLower(sUpperCase, 0, sUpperCase.getLength(), {});
        //CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", sTest, sLowerCase);
    }

    {
        // From title case
        OUString sTest(u"Ͳͱͻͼͽϳϗϲ");
        OUString sTitleCase = m_xCC->toTitle(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be title", OUString(u"Ͳͱͻͼͽϳϗϲ"), sTitleCase);
        //OUString sUpperCase = m_xCC->toUpper(sTest, 0, sTest.getLength(), {});
        //CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be upper", OUString(u"ͲͰϽϾϿͿϏϹ"), sUpperCase);
        OUString sLowerCase = m_xCC->toLower(sTest, 0, sTest.getLength(), {});
        CPPUNIT_ASSERT_EQUAL_MESSAGE("Should be lower", OUString(u"ͳͱͻͼͽϳϗϲ"), sLowerCase);
    }
}

void TestCharacterClassification::setUp()
{
    BootstrapFixtureBase::setUp();
diff --git a/i18nutil/source/utility/casefolding.cxx b/i18nutil/source/utility/casefolding.cxx
index 432de0b..d4f7992 100644
--- a/i18nutil/source/utility/casefolding.cxx
+++ b/i18nutil/source/utility/casefolding.cxx
@@ -26,6 +26,8 @@
#include <com/sun/star/uno/RuntimeException.hpp>
#include <rtl/character.hxx>

#include <unicode/uchar.h>

using namespace com::sun::star::lang;
using namespace com::sun::star::uno;

@@ -125,10 +127,45 @@ Mapping casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 l
                    // Should not come here
                    throw RuntimeException();
                }
            } else
            }
            else
            {
                dummy.map[0] = CaseMappingValue[address].value;
                return dummy;
            }
        }
    }

    // If the code point is not supported by our case mapping tables,
    // fallback to ICU functions.
    // TODO: this does not handle special case mapping as these require
    // using ustring.h APIs, which work on the whole string not character
    // by character.
    // TODO: what is the difference between ToLower and UpperToLower etc.?
    sal_uInt32 value = 0;
    switch (nMappingType)
    {
        case MappingType::ToLower:
        case MappingType::UpperToLower:
            value = u_tolower(c);
            break;
        case MappingType::ToUpper:
        case MappingType::LowerToUpper:
            value = u_toupper(c);
            break;
        case MappingType::ToTitle:
            value = u_totitle(c);
            break;
        case MappingType::SimpleFolding:
        case MappingType::FullFolding:
            value = u_foldCase(c, U_FOLD_CASE_DEFAULT);
            break;
        default: break;
    }

    if (value && value != c)
        dummy.nmap = rtl::splitSurrogates(value, dummy.map);

    return dummy;
}