Fix everything using XCharacterClassification::getStringType() and don't use it
See note in offapi/com/sun/star/i18n/XCharacterClassification.idl
The brain dead implementation is useless but API ... its use in
isAlphaNumericType() and similar never returned what would had
been expected.
Change-Id: I278f2468182dab94c32273ef69cf9634bc002cb4
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/139809
Reviewed-by: Eike Rathke <erack@redhat.com>
Tested-by: Jenkins
diff --git a/include/unotools/charclass.hxx b/include/unotools/charclass.hxx
index c76c7ae..7cb35ba 100644
--- a/include/unotools/charclass.hxx
+++ b/include/unotools/charclass.hxx
@@ -31,28 +31,29 @@
namespace com::sun::star::uno { class XComponentContext; }
namespace com::sun::star::i18n { class XCharacterClassification; }
const sal_Int32 nCharClassAlphaType =
inline constexpr sal_Int32 nCharClassAlphaType =
css::i18n::KCharacterType::UPPER |
css::i18n::KCharacterType::LOWER |
css::i18n::KCharacterType::TITLE_CASE;
const sal_Int32 nCharClassAlphaTypeMask =
inline constexpr sal_Int32 nCharClassAlphaTypeMask =
nCharClassAlphaType |
css::i18n::KCharacterType::LETTER | // Alpha is also always a LETTER
css::i18n::KCharacterType::PRINTABLE |
css::i18n::KCharacterType::BASE_FORM;
const sal_Int32 nCharClassLetterType =
inline constexpr sal_Int32 nCharClassLetterType =
nCharClassAlphaType |
css::i18n::KCharacterType::LETTER;
const sal_Int32 nCharClassLetterTypeMask =
inline constexpr sal_Int32 nCharClassLetterTypeMask =
nCharClassAlphaTypeMask |
css::i18n::KCharacterType::LETTER;
const sal_Int32 nCharClassNumericType =
inline constexpr sal_Int32 nCharClassNumericType =
css::i18n::KCharacterType::DIGIT;
const sal_Int32 nCharClassNumericTypeMask =
inline constexpr sal_Int32 nCharClassNumericTypeMask =
nCharClassNumericType |
css::i18n::KCharacterType::PRINTABLE |
css::i18n::KCharacterType::BASE_FORM;
@@ -86,14 +87,14 @@ public:
/// isalpha() on ascii values of entire string
static bool isAsciiAlpha( std::u16string_view rStr );
/// whether type is pure numeric or not, e.g. return of getStringType
/// whether type is pure numeric or not, e.g. return of getCharacterType()
static bool isNumericType( sal_Int32 nType )
{
return ((nType & nCharClassNumericType) != 0) &&
((nType & ~nCharClassNumericTypeMask) == 0);
}
/// whether type is pure alphanumeric or not, e.g. return of getStringType
/// whether type is pure alphanumeric or not, e.g. return of getCharacterType()
static bool isAlphaNumericType( sal_Int32 nType )
{
return ((nType & (nCharClassAlphaType |
@@ -102,14 +103,14 @@ public:
nCharClassNumericTypeMask)) == 0);
}
/// whether type is pure letter or not, e.g. return of getStringType
/// whether type is pure letter or not, e.g. return of getCharacterType()
static bool isLetterType( sal_Int32 nType )
{
return ((nType & nCharClassLetterType) != 0) &&
((nType & ~nCharClassLetterTypeMask) == 0);
}
/// whether type is pure letternumeric or not, e.g. return of getStringType
/// whether type is pure letternumeric or not, e.g. return of getCharacterType()
static bool isLetterNumericType( sal_Int32 nType )
{
return ((nType & (nCharClassLetterType |
@@ -141,7 +142,6 @@ public:
css::i18n::DirectionProperty getCharacterDirection( const OUString& rStr, sal_Int32 nPos ) const;
css::i18n::UnicodeScript getScript( const OUString& rStr, sal_Int32 nPos ) const;
sal_Int32 getCharacterType( const OUString& rStr, sal_Int32 nPos ) const;
sal_Int32 getStringType( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const;
css::i18n::ParseResult parseAnyToken(
const OUString& rStr,
@@ -167,10 +167,13 @@ public:
bool isDigit( const OUString& rStr, sal_Int32 nPos ) const;
bool isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const;
bool isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const;
bool isUpper( const OUString& rStr, sal_Int32 nPos ) const;
bool isLetter( const OUString& rStr ) const;
bool isNumeric( const OUString& rStr ) const;
bool isLetterNumeric( const OUString& rStr ) const;
bool isUpper( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const;
private:
const css::lang::Locale & getMyLocale() const;
diff --git a/linguistic/source/misc.cxx b/linguistic/source/misc.cxx
index dc4c17c..c315ad1 100644
--- a/linguistic/source/misc.cxx
+++ b/linguistic/source/misc.cxx
@@ -558,9 +558,7 @@ uno::Reference< XHyphenatedWord > RebuildHyphensAndControlChars(
bool IsUpper( const OUString &rText, sal_Int32 nPos, sal_Int32 nLen, LanguageType nLanguage )
{
CharClass aCC(( LanguageTag( nLanguage ) ));
sal_Int32 nFlags = aCC.getStringType( rText, nPos, nLen );
return (nFlags & KCharacterType::UPPER)
&& !(nFlags & KCharacterType::LOWER);
return aCC.isUpper( rText, nPos, nLen );
}
CapType capitalType(const OUString& aTerm, CharClass const * pCC)
diff --git a/unotools/source/i18n/charclass.cxx b/unotools/source/i18n/charclass.cxx
index ace153d..4573687 100644
--- a/unotools/source/i18n/charclass.cxx
+++ b/unotools/source/i18n/charclass.cxx
@@ -134,7 +134,14 @@ bool CharClass::isLetter( const OUString& rStr ) const
{
try
{
return isLetterType( xCC->getStringType( rStr, 0, rStr.getLength(), getMyLocale() ) );
sal_Int32 nPos = 0;
while (nPos < rStr.getLength())
{
if (!isLetter( rStr, nPos))
return false;
rStr.iterateCodePoints( &nPos);
}
return true;
}
catch ( const Exception& )
{
@@ -165,7 +172,14 @@ bool CharClass::isNumeric( const OUString& rStr ) const
{
try
{
return isNumericType( xCC->getStringType( rStr, 0, rStr.getLength(), getMyLocale() ) );
sal_Int32 nPos = 0;
while (nPos < rStr.getLength())
{
if (!isDigit( rStr, nPos))
return false;
rStr.iterateCodePoints( &nPos);
}
return true;
}
catch ( const Exception& )
{
@@ -183,7 +197,7 @@ bool CharClass::isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const
try
{
return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
(nCharClassAlphaType | KCharacterType::DIGIT)) != 0;
(nCharClassAlphaType | nCharClassNumericType)) != 0;
}
catch ( const Exception& )
{
@@ -201,7 +215,7 @@ bool CharClass::isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const
try
{
return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) &
(nCharClassLetterType | KCharacterType::DIGIT)) != 0;
(nCharClassLetterType | nCharClassNumericType)) != 0;
}
catch ( const Exception& )
{
@@ -214,7 +228,53 @@ bool CharClass::isLetterNumeric( const OUString& rStr ) const
{
try
{
return isLetterNumericType( xCC->getStringType( rStr, 0, rStr.getLength(), getMyLocale() ) );
sal_Int32 nPos = 0;
while (nPos < rStr.getLength())
{
if (!isLetterNumeric( rStr, nPos))
return false;
rStr.iterateCodePoints( &nPos);
}
return true;
}
catch ( const Exception& )
{
TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
}
return false;
}
bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const
{
sal_Unicode c = rStr[nPos];
if ( c < 128 )
return rtl::isAsciiUpperCase(c);
try
{
return (xCC->getCharacterType( rStr, nPos, getMyLocale()) &
KCharacterType::UPPER) != 0;
}
catch ( const Exception& )
{
TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
}
return false;
}
bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
{
try
{
assert(nPos >= 0 && nCount >= 0);
sal_Int32 nLen = std::min( nPos + nCount, rStr.getLength());
while (nPos < nLen)
{
if (!isUpper( rStr, nPos))
return false;
rStr.iterateCodePoints( &nPos);
}
return true;
}
catch ( const Exception& )
{
@@ -314,19 +374,6 @@ sal_Int32 CharClass::getCharacterType( const OUString& rStr, sal_Int32 nPos ) co
return 0;
}
sal_Int32 CharClass::getStringType( const OUString& rStr, sal_Int32 nPos, sal_Int32 nCount ) const
{
try
{
return xCC->getStringType( rStr, nPos, nCount, getMyLocale() );
}
catch ( const Exception& )
{
TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
}
return 0;
}
css::i18n::ParseResult CharClass::parseAnyToken(
const OUString& rStr,
sal_Int32 nPos,