Fix everything using XCharacterClassification::getStringType() and don't use it

See note in offapi/com/sun/star/i18n/XCharacterClassification.idl

The brain dead implementation is useless but API ... its use in
isAlphaNumericType() and similar never returned what would had
been expected.

Change-Id: I278f2468182dab94c32273ef69cf9634bc002cb4
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/139809
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
diff --git a/include/unotools/charclass.hxx b/include/unotools/charclass.hxx
index c76c7ae..7cb35ba 100644
--- a/include/unotools/charclass.hxx
+++ b/include/unotools/charclass.hxx
@@ -31,28 +31,29 @@
namespace com::sun::star::uno { class XComponentContext; }
namespace com::sun::star::i18n { class XCharacterClassification; }

const sal_Int32 nCharClassAlphaType =
inline constexpr sal_Int32 nCharClassAlphaType =
    css::i18n::KCharacterType::UPPER |
    css::i18n::KCharacterType::LOWER |
    css::i18n::KCharacterType::TITLE_CASE;

const sal_Int32 nCharClassAlphaTypeMask =
inline constexpr sal_Int32 nCharClassAlphaTypeMask =
    nCharClassAlphaType |
    css::i18n::KCharacterType::LETTER |     // Alpha is also always a LETTER
    css::i18n::KCharacterType::PRINTABLE |
    css::i18n::KCharacterType::BASE_FORM;

const sal_Int32 nCharClassLetterType =
inline constexpr sal_Int32 nCharClassLetterType =
    nCharClassAlphaType |
    css::i18n::KCharacterType::LETTER;

const sal_Int32 nCharClassLetterTypeMask =
inline constexpr sal_Int32 nCharClassLetterTypeMask =
    nCharClassAlphaTypeMask |
    css::i18n::KCharacterType::LETTER;

const sal_Int32 nCharClassNumericType =
inline constexpr sal_Int32 nCharClassNumericType =
    css::i18n::KCharacterType::DIGIT;

const sal_Int32 nCharClassNumericTypeMask =
inline constexpr sal_Int32 nCharClassNumericTypeMask =
    nCharClassNumericType |
    css::i18n::KCharacterType::PRINTABLE |
    css::i18n::KCharacterType::BASE_FORM;
@@ -86,14 +87,14 @@ public:
    /// isalpha() on ascii values of entire string
    static bool isAsciiAlpha( std::u16string_view rStr );

    /// whether type is pure numeric or not, e.g. return of getStringType
    /// whether type is pure numeric or not, e.g. return of getCharacterType()
    static bool isNumericType( sal_Int32 nType )
    {
        return ((nType & nCharClassNumericType) != 0) &&
            ((nType & ~nCharClassNumericTypeMask) == 0);
    }

    /// whether type is pure alphanumeric or not, e.g. return of getStringType
    /// whether type is pure alphanumeric or not, e.g. return of getCharacterType()
    static bool isAlphaNumericType( sal_Int32 nType )
    {
        return ((nType & (nCharClassAlphaType |
@@ -102,14 +103,14 @@ public:
            nCharClassNumericTypeMask)) == 0);
    }

    /// whether type is pure letter or not, e.g. return of getStringType
    /// whether type is pure letter or not, e.g. return of getCharacterType()
    static bool isLetterType( sal_Int32 nType )
    {
        return ((nType & nCharClassLetterType) != 0) &&
            ((nType & ~nCharClassLetterTypeMask) == 0);
    }

    /// whether type is pure letternumeric or not, e.g. return of getStringType
    /// whether type is pure letternumeric or not, e.g. return of getCharacterType()
    static bool isLetterNumericType( sal_Int32 nType )
    {
        return ((nType & (nCharClassLetterType |
@@ -141,7 +142,6 @@ public:
    css::i18n::DirectionProperty getCharacterDirection( const OUString& rStr, sal_Int32 nPos ) const;
    css::i18n::UnicodeScript getScript( const OUString& rStr, sal_Int32 nPos ) const;
    sal_Int32 getCharacterType( const OUString& rStr, sal_Int32 nPos ) const;
    sal_Int32 getStringType( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const;

    css::i18n::ParseResult parseAnyToken(
                                    const OUString& rStr,
@@ -167,10 +167,13 @@ public:
    bool isDigit( const OUString& rStr, sal_Int32 nPos ) const;
    bool isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const;
    bool isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const;
    bool isUpper( const OUString& rStr, sal_Int32 nPos ) const;
    bool isLetter( const OUString& rStr ) const;
    bool isNumeric( const OUString& rStr ) const;
    bool isLetterNumeric( const OUString& rStr ) const;

    bool isUpper( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const;

private:

    const css::lang::Locale &  getMyLocale() const;
diff --git a/linguistic/source/misc.cxx b/linguistic/source/misc.cxx
index dc4c17c..c315ad1 100644
--- a/linguistic/source/misc.cxx
+++ b/linguistic/source/misc.cxx
@@ -558,9 +558,7 @@ uno::Reference< XHyphenatedWord > RebuildHyphensAndControlChars(
bool IsUpper( const OUString &rText, sal_Int32 nPos, sal_Int32 nLen, LanguageType nLanguage )
{
    CharClass aCC(( LanguageTag( nLanguage ) ));
    sal_Int32 nFlags = aCC.getStringType( rText, nPos, nLen );
    return      (nFlags & KCharacterType::UPPER)
            && !(nFlags & KCharacterType::LOWER);
    return aCC.isUpper( rText, nPos, nLen );
}

CapType capitalType(const OUString& aTerm, CharClass const * pCC)
diff --git a/unotools/source/i18n/charclass.cxx b/unotools/source/i18n/charclass.cxx
index ace153d..4573687 100644
--- a/unotools/source/i18n/charclass.cxx
+++ b/unotools/source/i18n/charclass.cxx
@@ -134,7 +134,14 @@ bool CharClass::isLetter( const OUString& rStr ) const
{
    try
    {
        return isLetterType( xCC->getStringType( rStr, 0, rStr.getLength(), getMyLocale() ) );
        sal_Int32 nPos = 0;
        while (nPos < rStr.getLength())
        {
            if (!isLetter( rStr, nPos))
                return false;
            rStr.iterateCodePoints( &nPos);
        }
        return true;
    }
    catch ( const Exception& )
    {
@@ -165,7 +172,14 @@ bool CharClass::isNumeric( const OUString& rStr ) const
{
    try
    {
        return isNumericType( xCC->getStringType( rStr, 0, rStr.getLength(), getMyLocale() ) );
        sal_Int32 nPos = 0;
        while (nPos < rStr.getLength())
        {
            if (!isDigit( rStr, nPos))
                return false;
            rStr.iterateCodePoints( &nPos);
        }
        return true;
    }
    catch ( const Exception& )
    {
@@ -183,7 +197,7 @@ bool CharClass::isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const
    try
    {
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
                (nCharClassAlphaType | KCharacterType::DIGIT)) != 0;
                (nCharClassAlphaType | nCharClassNumericType)) != 0;
    }
    catch ( const Exception& )
    {
@@ -201,7 +215,7 @@ bool CharClass::isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const
    try
    {
        return  (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
                (nCharClassLetterType | KCharacterType::DIGIT)) != 0;
                (nCharClassLetterType | nCharClassNumericType)) != 0;
    }
    catch ( const Exception& )
    {
@@ -214,7 +228,53 @@ bool CharClass::isLetterNumeric( const OUString& rStr ) const
{
    try
    {
        return isLetterNumericType( xCC->getStringType( rStr, 0, rStr.getLength(), getMyLocale() ) );
        sal_Int32 nPos = 0;
        while (nPos < rStr.getLength())
        {
            if (!isLetterNumeric( rStr, nPos))
                return false;
            rStr.iterateCodePoints( &nPos);
        }
        return true;
    }
    catch ( const Exception& )
    {
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
    }
    return false;
}

bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const
{
    sal_Unicode c = rStr[nPos];
    if ( c < 128 )
        return rtl::isAsciiUpperCase(c);

    try
    {
        return (xCC->getCharacterType( rStr, nPos, getMyLocale()) &
                KCharacterType::UPPER) != 0;
    }
    catch ( const Exception& )
    {
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
    }
    return false;
}

bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
{
    try
    {
        assert(nPos >= 0 && nCount >= 0);
        sal_Int32 nLen = std::min( nPos + nCount, rStr.getLength());
        while (nPos < nLen)
        {
            if (!isUpper( rStr, nPos))
                return false;
            rStr.iterateCodePoints( &nPos);
        }
        return true;
    }
    catch ( const Exception& )
    {
@@ -314,19 +374,6 @@ sal_Int32 CharClass::getCharacterType( const OUString& rStr, sal_Int32 nPos ) co
    return 0;
}

sal_Int32 CharClass::getStringType( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
{
    try
    {
        return xCC->getStringType( rStr, nPos, nCount, getMyLocale() );
    }
    catch ( const Exception& )
    {
        TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
    }
    return 0;
}

css::i18n::ParseResult CharClass::parseAnyToken(
            const OUString& rStr,
            sal_Int32 nPos,