tdf#142437: Fix word boundary detection in autocorrect
Marks (combining and spacing) were incorrectly considered word separators,
because isLetterNumeric() matches only for letters and numbers.
The new isBase() matches any character with BASE_FORM character class, which covers letters, numbers, and marks.
Change-Id: I27ec2f7fb8d360791a280d10aba9b6d16e7cfb71
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/153509
Tested-by: Jenkins
Reviewed-by: خالد حسني <khaled@libreoffice.org>
(cherry picked from commit caab94a3e0387bde05538cff91ff13446f330785)
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/153517
diff --git a/include/unotools/charclass.hxx b/include/unotools/charclass.hxx
index 7cb35ba..bc91aca 100644
--- a/include/unotools/charclass.hxx
+++ b/include/unotools/charclass.hxx
@@ -58,6 +58,9 @@ inline constexpr sal_Int32 nCharClassNumericTypeMask =
css::i18n::KCharacterType::PRINTABLE |
css::i18n::KCharacterType::BASE_FORM;
inline constexpr sal_Int32 nCharClassBaseType =
css::i18n::KCharacterType::BASE_FORM;
class UNOTOOLS_DLLPUBLIC CharClass
{
LanguageTag maLanguageTag;
@@ -167,6 +170,7 @@ public:
bool isDigit( const OUString& rStr, sal_Int32 nPos ) const;
bool isAlphaNumeric( const OUString& rStr, sal_Int32 nPos ) const;
bool isLetterNumeric( const OUString& rStr, sal_Int32 nPos ) const;
bool isBase( const OUString& rStr, sal_Int32 nPos ) const;
bool isUpper( const OUString& rStr, sal_Int32 nPos ) const;
bool isLetter( const OUString& rStr ) const;
bool isNumeric( const OUString& rStr ) const;
diff --git a/sw/source/core/edit/autofmt.cxx b/sw/source/core/edit/autofmt.cxx
index 2c9eb69a..010e0b2 100644
--- a/sw/source/core/edit/autofmt.cxx
+++ b/sw/source/core/edit/autofmt.cxx
@@ -2110,7 +2110,7 @@ void SwAutoFormat::AutoCorrect(TextFrameIndex nPos)
bFirstSent = true;
[[fallthrough]];
default:
if (!(rAppCC.isLetterNumeric(*pText, sal_Int32(nPos))
if (!(rAppCC.isBase(*pText, sal_Int32(nPos))
|| '/' == cChar )) // '/' should not be a word separator (e.g. '1/2' needs to be handled as one word for replacement)
{
--nPos; // revert ++nPos which was decremented in for loop
diff --git a/unotools/source/i18n/charclass.cxx b/unotools/source/i18n/charclass.cxx
index be3a9f4..423f953 100644
--- a/unotools/source/i18n/charclass.cxx
+++ b/unotools/source/i18n/charclass.cxx
@@ -253,6 +253,23 @@ bool CharClass::isLetterNumeric( const OUString& rStr ) const
return false;
}
bool CharClass::isBase( const OUString& rStr, sal_Int32 nPos ) const
{
sal_Unicode c = rStr[nPos];
if ( c < 128 )
return rtl::isAsciiAlphanumeric( c );
try
{
return (xCC->getCharacterType( rStr, nPos, getMyLocale() ) & nCharClassBaseType ) != 0;
}
catch ( const Exception& )
{
TOOLS_WARN_EXCEPTION("unotools.i18n", "" );
}
return false;
}
bool CharClass::isUpper( const OUString& rStr, sal_Int32 nPos ) const
{
sal_Unicode c = rStr[nPos];