tdf#133524 AutoCorrect: support double angle quotes

Add two methods to support double angle quotes,
as part of "Double quotes" replacement:

1. Correct ">>" and "<<" to » and « in several languages,
where double angle quotes are default or alternative
primary or second level quotation marks, but actual
LibreOffice locale settings don't contain double angle
quotes.

2. Correct " to double angle quotes, if the cursor
is there in a primary level quotation (i.e. there
is a preceding primary level opening quote, but not
other quotes). For example, it's possible to type
Hungarian or Romanian quotation marks in

„... »quote« ...”

pressing only Shift + 2 (") for them. (These languages,
where "Single quotes" replacement is used for apostrophe
and third level quotes instead of the standard second
level quotation marks.)

Change-Id: Icd1584a5a2b81422de693217d2d1f7f3058a74b1
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/95212
Tested-by: Jenkins
Tested-by: László Németh <nemeth@numbertext.org>
Reviewed-by: László Németh <nemeth@numbertext.org>
(cherry picked from commit 57f07b1d7378d218648667c5b1315cc8ad905875)
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/95310
Reviewed-by: Adolfo Jayme Barrientos <fitojb@ubuntu.com>
diff --git a/editeng/source/misc/svxacorr.cxx b/editeng/source/misc/svxacorr.cxx
index ebc0ec8..24a5235 100644
--- a/editeng/source/misc/svxacorr.cxx
+++ b/editeng/source/misc/svxacorr.cxx
@@ -259,6 +259,7 @@ bool SvxAutoCorrect::IsAutoCorrectChar( sal_Unicode cChar )
            cChar == '*'  || cChar == '_'  || cChar == '%' ||
            cChar == '.'  || cChar == ','  || cChar == ';' ||
            cChar == ':'  || cChar == '?' || cChar == '!' ||
            cChar == '<'  || cChar == '>' ||
            cChar == '/'  || cChar == '-';
}

@@ -309,6 +310,12 @@ ACFlags SvxAutoCorrect::GetDefaultFlags()
static constexpr sal_Unicode cEmDash = 0x2014;
static constexpr sal_Unicode cEnDash = 0x2013;
static constexpr sal_Unicode cApostrophe = 0x2019;
static constexpr sal_Unicode cLeftDoubleAngleQuote = 0xAB;
static constexpr sal_Unicode cRightDoubleAngleQuote = 0xBB;
// stop characters for searching preceding quotes
// (the first character is also the opening quote we are looking for)
const sal_Unicode aStopDoubleAngleQuoteStart[] = { 0x201E, 0x201D, 0 }; // preceding ,,
const sal_Unicode aStopDoubleAngleQuoteEnd[] = { cRightDoubleAngleQuote, cLeftDoubleAngleQuote, 0x201D, 0x201E, 0 }; // preceding >>

SvxAutoCorrect::SvxAutoCorrect( const OUString& rShareAutocorrFile,
                                const OUString& rUserAutocorrFile )
@@ -1194,7 +1201,16 @@ void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
                                    sal_Unicode cInsChar, bool bSttQuote,
                                    bool bIns, LanguageType eLang, ACQuotes eType ) const
{
    sal_Unicode cRet = GetQuote( cInsChar, bSttQuote, eLang );
    sal_Unicode cRet;

    if ( eType == ACQuotes::DoubleAngleQuote )
    {
        cRet = ( '<' == cInsChar || ('\"' == cInsChar && !bSttQuote) )
                ? cLeftDoubleAngleQuote
                : cRightDoubleAngleQuote;
    }
    else
        cRet = GetQuote( cInsChar, bSttQuote, eLang );

    OUString sChg( cInsChar );
    if( bIns )
@@ -1213,6 +1229,11 @@ void SvxAutoCorrect::InsertQuote( SvxAutoCorrDoc& rDoc, sal_Int32 nInsPos,
                ++nInsPos;
        }
    }
    else if( eType == ACQuotes::DoubleAngleQuote && cInsChar != '\"' )
    {
        rDoc.Delete( nInsPos-1, nInsPos);
        --nInsPos;
    }

    rDoc.Replace( nInsPos, sChg );

@@ -1242,6 +1263,26 @@ OUString SvxAutoCorrect::GetQuote( SvxAutoCorrDoc const & rDoc, sal_Int32 nInsPo
    return sRet;
}

// search preceding opening quote in the paragraph before the insert position
static bool lcl_HasPrecedingChar( const OUString& rTxt, sal_Int32 nPos,
                const sal_Unicode sPrecedingChar, const sal_Unicode* aStopChars )
{
    sal_Unicode cTmpChar;

    do {
        cTmpChar = rTxt[ --nPos ];
        if ( cTmpChar == sPrecedingChar )
            return true;

        for ( const sal_Unicode* pCh = aStopChars; *pCh; ++pCh )
            if ( cTmpChar == *pCh )
                return false;

    } while ( nPos > 0 );

    return false;
}

// WARNING: rText may become invalid, see comment below
void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                    sal_Int32 nInsPos, sal_Unicode cChar,
@@ -1292,6 +1333,14 @@ void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                    {
                        eType = ACQuotes::CapitalizeIAm;
                    }
                    // tdf#133524 support << and >> in Hungarian and Romanian
                    else if ( !bSingle && nInsPos && eLang.anyOf( LANGUAGE_HUNGARIAN, LANGUAGE_ROMANIAN ) &&
                        lcl_HasPrecedingChar( rTxt, nInsPos,
                                bSttQuote ? aStopDoubleAngleQuoteStart[0] : aStopDoubleAngleQuoteEnd[0],
                                bSttQuote ? aStopDoubleAngleQuoteStart + 1 : aStopDoubleAngleQuoteEnd + 1 ) )
                    {
                        eType = ACQuotes::DoubleAngleQuote;
                    }
                }

                if ( eType == ACQuotes::NONE && !bSingle &&
@@ -1301,6 +1350,29 @@ void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                InsertQuote( rDoc, nInsPos, cChar, bSttQuote, bInsert, eLang, eType );
                break;
            }
            // tdf#133524 change "<<" and ">>" to double angle quoation marks
            else if ( IsAutoCorrFlag( ACFlags::ChgQuotes ) && ('<' == cChar || '>' == cChar) &&
                nInsPos > 0 && cChar == rTxt[ nInsPos-1 ] )
            {
                const LanguageType eLang = GetDocLanguage( rDoc, nInsPos );
                if ( eLang.anyOf(
                        LANGUAGE_FINNISH,              // alternative primary level
                        LANGUAGE_HUNGARIAN,            // second level
                        LANGUAGE_POLISH,               // second level
                        LANGUAGE_PORTUGUESE,           // primary level
                        LANGUAGE_PORTUGUESE_BRAZILIAN, // primary level
                        LANGUAGE_ROMANIAN,             // second level
                        LANGUAGE_ROMANIAN_MOLDOVA,     // second level
                        LANGUAGE_SWEDISH,              // alternative primary level
                        LANGUAGE_SWEDISH_FINLAND,      // alternative primary level
                        LANGUAGE_UKRAINIAN ) ||        // primary level
                    primary(eLang) == primary(LANGUAGE_GERMAN) ||  // alternative primary level
                    primary(eLang) == primary(LANGUAGE_SPANISH) )  // primary level
                {
                    InsertQuote( rDoc, nInsPos, cChar, false, bInsert, eLang, ACQuotes::DoubleAngleQuote );
                    break;
                }
            }

            if( bInsert )
                rDoc.Insert( nInsPos, OUString(cChar) );
diff --git a/include/editeng/svxacorr.hxx b/include/editeng/svxacorr.hxx
index 708c25a..c9d9a86 100644
--- a/include/editeng/svxacorr.hxx
+++ b/include/editeng/svxacorr.hxx
@@ -85,6 +85,7 @@ enum class ACQuotes
    NONE,
    NonBreakingSpace,
    CapitalizeIAm,
    DoubleAngleQuote,
};

// TODO: handle code points > U+FFFF and check users of this class
diff --git a/sw/qa/extras/uiwriter/data/tdf133524.fodt b/sw/qa/extras/uiwriter/data/tdf133524.fodt
new file mode 100644
index 0000000..aaa4b88
--- /dev/null
+++ b/sw/qa/extras/uiwriter/data/tdf133524.fodt
@@ -0,0 +1,14 @@
<?xml version="1.0" encoding="UTF-8"?>
<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:style="urn:oasis:names:tc:opendocument:xmlns:style:1.0" xmlns:table="urn:oasis:names:tc:opendocument:xmlns:table:1.0" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" xmlns:fo="urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" xmlns:officeooo="http://openoffice.org/2009/office" xmlns:loext="urn:org:documentfoundation:names:experimental:office:xmlns:loext:1.0" office:version="1.2" office:mimetype="application/vnd.oasis.opendocument.text">
 <office:styles>
  <style:style style:name="Standard" style:family="paragraph" style:class="text"/>
  <style:default-style style:family="paragraph">
   <style:text-properties fo:language="hu" fo:country="HU"/>
  </style:default-style>
 </office:styles>
 <office:body>
  <office:text>
   <text:p text:style-name="Standard"></text:p>
  </office:text>
 </office:body>
</office:document>
diff --git a/sw/qa/extras/uiwriter/uiwriter.cxx b/sw/qa/extras/uiwriter/uiwriter.cxx
index cfbc9a5..9ba5cb9 100644
--- a/sw/qa/extras/uiwriter/uiwriter.cxx
+++ b/sw/qa/extras/uiwriter/uiwriter.cxx
@@ -368,6 +368,7 @@ public:
    void testTdf54409();
    void testTdf38394();
    void testTdf59666();
    void testTdf133524();
    void testInconsistentBookmark();
#if HAVE_FEATURE_PDFIUM
    void testInsertPdf();
@@ -582,6 +583,7 @@ public:
    CPPUNIT_TEST(testTdf54409);
    CPPUNIT_TEST(testTdf38394);
    CPPUNIT_TEST(testTdf59666);
    CPPUNIT_TEST(testTdf133524);
#if HAVE_FEATURE_PDFIUM
    CPPUNIT_TEST(testInsertPdf);
#endif
@@ -7191,6 +7193,53 @@ void SwUiWriterTest::testTdf59666()
    CPPUNIT_ASSERT_EQUAL(sReplaced, static_cast<SwTextNode*>(pDoc->GetNodes()[nIndex])->GetText());
}

void SwUiWriterTest::testTdf133524()
{
    SwDoc* pDoc = createDoc("tdf133524.fodt");
    SwWrtShell* pWrtShell = pDoc->GetDocShell()->GetWrtShell();
    // 1. Testing autocorrect of >> and <<
    // Example: »word«
    SwAutoCorrect corr(*SvxAutoCorrCfg::Get().GetAutoCorrect());
    // >>
    pWrtShell->Insert(u">");
    pWrtShell->AutoCorrect(corr, '>');
    sal_uLong nIndex = pWrtShell->GetCursor()->GetNode().GetIndex();
    OUString sReplaced(u"»");
    CPPUNIT_ASSERT_EQUAL(sReplaced, static_cast<SwTextNode*>(pDoc->GetNodes()[nIndex])->GetText());
    // <<
    pWrtShell->Insert(u"word<");
    pWrtShell->AutoCorrect(corr, '<');
    nIndex = pWrtShell->GetCursor()->GetNode().GetIndex();
    sReplaced += u"word«";
    CPPUNIT_ASSERT_EQUAL(sReplaced, static_cast<SwTextNode*>(pDoc->GetNodes()[nIndex])->GetText());
    // 2. Testing autocorrect of " to >> and << inside „...”
    // Example: „Sentence and »word«.”
    // opening primary level quote
    pWrtShell->Insert(u" ");
    pWrtShell->AutoCorrect(corr, '"');
    nIndex = pWrtShell->GetCursor()->GetNode().GetIndex();
    sReplaced += u" „";
    CPPUNIT_ASSERT_EQUAL(sReplaced, static_cast<SwTextNode*>(pDoc->GetNodes()[nIndex])->GetText());
    // opening second level quote
    pWrtShell->Insert(u"Sentence and ");
    pWrtShell->AutoCorrect(corr, '"');
    nIndex = pWrtShell->GetCursor()->GetNode().GetIndex();
    sReplaced += u"Sentence and »";
    CPPUNIT_ASSERT_EQUAL(sReplaced, static_cast<SwTextNode*>(pDoc->GetNodes()[nIndex])->GetText());
    // closing second level quote
    pWrtShell->Insert(u"word");
    pWrtShell->AutoCorrect(corr, '"');
    nIndex = pWrtShell->GetCursor()->GetNode().GetIndex();
    sReplaced += u"word«";
    CPPUNIT_ASSERT_EQUAL(sReplaced, static_cast<SwTextNode*>(pDoc->GetNodes()[nIndex])->GetText());
    // closing primary level quote
    pWrtShell->Insert(u".");
    pWrtShell->AutoCorrect(corr, '"');
    nIndex = pWrtShell->GetCursor()->GetNode().GetIndex();
    sReplaced += u".”";
    CPPUNIT_ASSERT_EQUAL(sReplaced, static_cast<SwTextNode*>(pDoc->GetNodes()[nIndex])->GetText());
}

#if HAVE_FEATURE_PDFIUM
void SwUiWriterTest::testInsertPdf()
{