tdf#145925: Add DOI recognition

Detect DOI string in the form of "doi:10.*" and add hyperlink to it.
It works the same way as url recognition.

Change-Id: I3c4e78a110fd81ad7e727d5e9acee7e51127466a
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/150954
Tested-by: Jenkins
Reviewed-by: Heiko Tietze <heiko.tietze@documentfoundation.org>
Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
diff --git a/cui/inc/strings.hrc b/cui/inc/strings.hrc
index 498e60e..442ac51 100644
--- a/cui/inc/strings.hrc
+++ b/cui/inc/strings.hrc
@@ -332,6 +332,7 @@
#define RID_CUISTR_BOLD_UNDER                       NC_("RID_SVXSTR_BOLD_UNDER", "Automatic *bold*, /italic/, -strikeout- and _underline_")
#define RID_CUISTR_NO_DBL_SPACES                    NC_("RID_SVXSTR_NO_DBL_SPACES", "Ignore double spaces")
#define RID_CUISTR_DETECT_URL                       NC_("RID_SVXSTR_DETECT_URL", "URL Recognition")
#define RID_CUISTR_DETECT_DOI                       NC_("RID_SVXSTR_DETECT_DOI", "DOI citation recognition")
#define RID_CUISTR_DASH                             NC_("RID_SVXSTR_DASH", "Replace dashes")
#define RID_CUISTR_CORRECT_ACCIDENTAL_CAPS_LOCK     NC_("RID_SVXSTR_CORRECT_ACCIDENTAL_CAPS_LOCK", "Correct accidental use of cAPS LOCK key")
#define RID_CUISTR_NON_BREAK_SPACE                  NC_("RID_SVXSTR_NON_BREAK_SPACE", "Add non-breaking space before specific punctuation marks in French text")
diff --git a/cui/source/inc/autocdlg.hxx b/cui/source/inc/autocdlg.hxx
index 7b48423..2357677 100644
--- a/cui/source/inc/autocdlg.hxx
+++ b/cui/source/inc/autocdlg.hxx
@@ -57,6 +57,7 @@ private:
    OUString m_sStartCap;
    OUString m_sBoldUnderline;
    OUString m_sURL;
    OUString m_sDOI;
    OUString m_sNoDblSpaces;
    OUString m_sDash;
    OUString m_sAccidentalCaps;
@@ -92,6 +93,7 @@ class OfaSwAutoFmtOptionsPage : public SfxTabPage
    OUString        sNoDblSpaces;
    OUString        sCorrectCapsLock;
    OUString        sDetectURL;
    OUString        sDetectDOI;
    OUString        sDash;
    OUString        sRightMargin;
    OUString        sNum;
diff --git a/cui/source/tabpages/autocdlg.cxx b/cui/source/tabpages/autocdlg.cxx
index aea3f7d..38a261a 100644
--- a/cui/source/tabpages/autocdlg.cxx
+++ b/cui/source/tabpages/autocdlg.cxx
@@ -189,6 +189,7 @@ OfaAutocorrOptionsPage::OfaAutocorrOptionsPage(weld::Container* pPage, weld::Dia
    , m_sStartCap(CuiResId(RID_CUISTR_CPTL_STT_SENT))
    , m_sBoldUnderline(CuiResId(RID_CUISTR_BOLD_UNDER))
    , m_sURL(CuiResId(RID_CUISTR_DETECT_URL))
    , m_sDOI(CuiResId(RID_CUISTR_DETECT_DOI))
    , m_sNoDblSpaces(CuiResId(RID_CUISTR_NO_DBL_SPACES))
    , m_sDash(CuiResId(RID_CUISTR_DASH))
    , m_sAccidentalCaps(CuiResId(RID_CUISTR_CORRECT_ACCIDENTAL_CAPS_LOCK))
@@ -223,6 +224,7 @@ bool OfaAutocorrOptionsPage::FillItemSet( SfxItemSet* )
    pAutoCorrect->SetAutoCorrFlag(ACFlags::CapitalStartSentence, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
    pAutoCorrect->SetAutoCorrFlag(ACFlags::ChgWeightUnderl,      m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
    pAutoCorrect->SetAutoCorrFlag(ACFlags::SetINetAttr,          m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
    pAutoCorrect->SetAutoCorrFlag(ACFlags::SetDOIAttr,           m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
    pAutoCorrect->SetAutoCorrFlag(ACFlags::ChgToEnEmDash,        m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
    pAutoCorrect->SetAutoCorrFlag(ACFlags::IgnoreDoubleSpace,    m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
    pAutoCorrect->SetAutoCorrFlag(ACFlags::CorrectCapsLock,      m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
@@ -263,6 +265,7 @@ void OfaAutocorrOptionsPage::Reset( const SfxItemSet* )
    InsertEntry(m_sStartCap);
    InsertEntry(m_sBoldUnderline);
    InsertEntry(m_sURL);
    InsertEntry(m_sDOI);
    InsertEntry(m_sDash);
    InsertEntry(m_sNoDblSpaces);
    InsertEntry(m_sAccidentalCaps);
@@ -273,6 +276,7 @@ void OfaAutocorrOptionsPage::Reset( const SfxItemSet* )
    m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::CapitalStartSentence) ? TRISTATE_TRUE : TRISTATE_FALSE );
    m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::ChgWeightUnderl) ? TRISTATE_TRUE : TRISTATE_FALSE );
    m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::SetINetAttr) ? TRISTATE_TRUE : TRISTATE_FALSE );
    m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::SetDOIAttr) ? TRISTATE_TRUE : TRISTATE_FALSE );
    m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::ChgToEnEmDash) ? TRISTATE_TRUE : TRISTATE_FALSE );
    m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::IgnoreDoubleSpace) ? TRISTATE_TRUE : TRISTATE_FALSE );
    m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::CorrectCapsLock) ? TRISTATE_TRUE : TRISTATE_FALSE );
@@ -333,6 +337,7 @@ enum OfaAutoFmtOptions
    BEGIN_UPPER,
    BOLD_UNDERLINE,
    DETECT_URL,
    DETECT_DOI,
    REPLACE_DASHES,
    DEL_SPACES_AT_STT_END,
    DEL_SPACES_BETWEEN_LINES,
@@ -363,6 +368,7 @@ OfaSwAutoFmtOptionsPage::OfaSwAutoFmtOptionsPage(weld::Container* pPage, weld::D
    , sNoDblSpaces(CuiResId(RID_CUISTR_NO_DBL_SPACES))
    , sCorrectCapsLock(CuiResId(RID_CUISTR_CORRECT_ACCIDENTAL_CAPS_LOCK))
    , sDetectURL(CuiResId(RID_CUISTR_DETECT_URL))
    , sDetectDOI(CuiResId(RID_CUISTR_DETECT_DOI))
    , sDash(CuiResId(RID_CUISTR_DASH))
    , sRightMargin(CuiResId(RID_CUISTR_RIGHT_MARGIN))
    , sNum(CuiResId(RID_CUISTR_NUM))
@@ -455,6 +461,12 @@ bool OfaSwAutoFmtOptionsPage::FillItemSet( SfxItemSet*  )
    pAutoCorrect->SetAutoCorrFlag(ACFlags::SetINetAttr,
                        m_xCheckLB->get_toggle(DETECT_URL, CBCOL_SECOND) == TRISTATE_TRUE);

    bCheck = m_xCheckLB->get_toggle(DETECT_DOI, CBCOL_FIRST) == TRISTATE_TRUE;
    bModified |= pOpt->bSetDOIAttr != bCheck;
    pOpt->bSetDOIAttr = bCheck;
    pAutoCorrect->SetAutoCorrFlag(ACFlags::SetDOIAttr,
                        m_xCheckLB->get_toggle(DETECT_DOI, CBCOL_SECOND) == TRISTATE_TRUE);

    bCheck = m_xCheckLB->get_toggle(DEL_EMPTY_NODE, CBCOL_FIRST) == TRISTATE_TRUE;
    bModified |= pOpt->bDelEmptyNode != bCheck;
    pOpt->bDelEmptyNode = bCheck;
@@ -558,6 +570,7 @@ void OfaSwAutoFmtOptionsPage::Reset( const SfxItemSet* )
    CreateEntry(sCapitalStartSentence, CBCOL_BOTH  );
    CreateEntry(sBoldUnder,         CBCOL_BOTH  );
    CreateEntry(sDetectURL,         CBCOL_BOTH  );
    CreateEntry(sDetectDOI,         CBCOL_BOTH  );
    CreateEntry(sDash,              CBCOL_BOTH  );
    CreateEntry(sDelSpaceAtSttEnd,  CBCOL_BOTH  );
    CreateEntry(sDelSpaceBetweenLines, CBCOL_BOTH  );
@@ -583,6 +596,8 @@ void OfaSwAutoFmtOptionsPage::Reset( const SfxItemSet* )
    m_xCheckLB->set_toggle(BOLD_UNDERLINE, bool(nFlags & ACFlags::ChgWeightUnderl) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND);
    m_xCheckLB->set_toggle(DETECT_URL, pOpt->bSetINetAttr ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST);
    m_xCheckLB->set_toggle(DETECT_URL, bool(nFlags & ACFlags::SetINetAttr) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND);
    m_xCheckLB->set_toggle(DETECT_DOI, pOpt->bSetDOIAttr ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST);
    m_xCheckLB->set_toggle(DETECT_DOI, bool(nFlags & ACFlags::SetDOIAttr) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND);
    m_xCheckLB->set_toggle(REPLACE_DASHES, pOpt->bChgToEnEmDash ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST);
    m_xCheckLB->set_toggle(REPLACE_DASHES, bool(nFlags & ACFlags::ChgToEnEmDash) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND);
    m_xCheckLB->set_toggle(DEL_SPACES_AT_STT_END, pOpt->bAFormatDelSpacesAtSttEnd ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST);
diff --git a/editeng/source/misc/acorrcfg.cxx b/editeng/source/misc/acorrcfg.cxx
index 787663a..8603b43 100644
--- a/editeng/source/misc/acorrcfg.cxx
+++ b/editeng/source/misc/acorrcfg.cxx
@@ -185,9 +185,10 @@ Sequence<OUString>  SvxBaseAutoCorrCfg::GetPropertyNames()
        "DoubleQuoteAtEnd",                     // 16
        "CorrectAccidentalCapsLock",            // 17
        "TransliterateRTL",                     // 18
        "ChangeAngleQuotes"                     // 19
        "ChangeAngleQuotes",                    // 19
        "SetDOIAttribute",                      // 20
    };
    const int nCount = 20;
    const int nCount = 21;
    Sequence<OUString> aNames(nCount);
    OUString* pNames = aNames.getArray();
    for(int i = 0; i < nCount; i++)
@@ -298,6 +299,10 @@ void SvxBaseAutoCorrCfg::Load(bool bInit)
                    if(*o3tl::doAccess<bool>(pValues[nProp]))
                        nFlags |= ACFlags::ChgAngleQuotes;
                break;//"ChangeAngleQuotes"
                case  20:
                    if(*o3tl::doAccess<bool>(pValues[nProp]))
                        nFlags |= ACFlags::SetDOIAttr;
                break;//"SetDOIAttr",
            }
        }
    }
@@ -333,6 +338,7 @@ void SvxBaseAutoCorrCfg::ImplCommit()
         css::uno::Any(bool(nFlags & ACFlags::ChgWeightUnderl)),
            // "ChangeUnderlineWeight"
         css::uno::Any(bool(nFlags & ACFlags::SetINetAttr)), // "SetInetAttribute"
         css::uno::Any(bool(nFlags & ACFlags::SetDOIAttr)), // "SetDOIAttr"
         css::uno::Any(bool(nFlags & ACFlags::ChgOrdinalNumber)),
            // "ChangeOrdinalNumber"
         css::uno::Any(bool(nFlags & ACFlags::AddNonBrkSpace)), // "AddNonBreakingSpace"
@@ -414,8 +420,9 @@ Sequence<OUString>  SvxSwAutoCorrCfg::GetPropertyNames()
        "Format/ByInput/ApplyNumbering/SpecialCharacter/FontFamily",    //44
        "Format/ByInput/ApplyNumbering/SpecialCharacter/FontCharset",   //45
        "Format/ByInput/ApplyNumbering/SpecialCharacter/FontPitch",     //46
        "Format/Option/SetDOIAttribute",                                //47
    };
    const int nCount = 47;
    const int nCount = 48;
    Sequence<OUString> aNames(nCount);
    OUString* pNames = aNames.getArray();
    for(int i = 0; i < nCount; i++)
@@ -565,6 +572,7 @@ void SvxSwAutoCorrCfg::Load(bool bInit)
                    rSwFlags.aByInputBulletFont.SetPitch(FontPitch(nVal));
                }
                break;// "Format/ByInput/ApplyNumbering/SpecialCharacter/FontPitch",
                case   47: rSwFlags.bSetDOIAttr = *o3tl::doAccess<bool>(pValues[nProp]); break; // "Format/Option/SetDOIAttribute",
            }
        }
    }
@@ -666,8 +674,10 @@ void SvxSwAutoCorrCfg::ImplCommit()
            // "Format/ByInput/ApplyNumbering/SpecialCharacter/FontFamily"
         css::uno::Any(sal_Int32(rSwFlags.aByInputBulletFont.GetCharSet())),
            // "Format/ByInput/ApplyNumbering/SpecialCharacter/FontCharset"
         css::uno::Any(sal_Int32(rSwFlags.aByInputBulletFont.GetPitch()))});
         css::uno::Any(sal_Int32(rSwFlags.aByInputBulletFont.GetPitch())),
            // "Format/ByInput/ApplyNumbering/SpecialCharacter/FontPitch"
         css::uno::Any(rSwFlags.bSetDOIAttr)});
            // "Format/Option/SetDOIAttribute"
}

void SvxSwAutoCorrCfg::Notify( const Sequence<OUString>& /* aPropertyNames */ )
diff --git a/editeng/source/misc/svxacorr.cxx b/editeng/source/misc/svxacorr.cxx
index 5e229ba..6874303 100644
--- a/editeng/source/misc/svxacorr.cxx
+++ b/editeng/source/misc/svxacorr.cxx
@@ -289,6 +289,7 @@ ACFlags SvxAutoCorrect::GetDefaultFlags()
                    | ACFlags::ChgAngleQuotes
                    | ACFlags::ChgWeightUnderl
                    | ACFlags::SetINetAttr
                    | ACFlags::SetDOIAttr
                    | ACFlags::ChgQuotes
                    | ACFlags::SaveWordCplSttLst
                    | ACFlags::SaveWordWordStartLst
@@ -752,6 +753,18 @@ bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
    return bRet;
}

// DOI citation recognition
bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                    sal_Int32 nSttPos, sal_Int32 nEndPos,
                                    LanguageType eLang )
{
    OUString sURL( URIHelper::FindFirstDOIInText( rTxt, nSttPos, nEndPos, GetCharClass( eLang ) ));
    bool bRet = !sURL.isEmpty();
    if( bRet )          // so, set attribute:
        rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
    return bRet;
}

// Automatic *bold*, /italic/, -strikeout- and _underline_
bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                                        sal_Int32 nEndPos )
@@ -1609,7 +1622,10 @@ void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
                FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
            ( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
                ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
                FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
                FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
            ( IsAutoCorrFlag( ACFlags::SetDOIAttr ) &&
                ( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
                FnSetDOIAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
            ;
        else
        {
diff --git a/editeng/source/misc/swafopt.cxx b/editeng/source/misc/swafopt.cxx
index 2935545..f6de409 100644
--- a/editeng/source/misc/swafopt.cxx
+++ b/editeng/source/misc/swafopt.cxx
@@ -36,6 +36,7 @@ SvxSwAutoFormatFlags::SvxSwAutoFormatFlags()
    bChgToEnEmDash =
    bChgWeightUnderl =
    bSetINetAttr =
    bSetDOIAttr =
    bAFormatDelSpacesAtSttEnd =
    bAFormatDelSpacesBetweenLines =
    bAFormatByInpDelSpacesAtSttEnd =
diff --git a/include/editeng/svxacorr.hxx b/include/editeng/svxacorr.hxx
index 39a3d4c..eddf8c9 100644
--- a/include/editeng/svxacorr.hxx
+++ b/include/editeng/svxacorr.hxx
@@ -76,13 +76,14 @@ enum class ACFlags : sal_uInt32 {
    CorrectCapsLock      = 0x00002000,   // Correct accidental use of cAPS LOCK key
    TransliterateRTL     = 0x00004000,   // Transliterate RTL text
    ChgAngleQuotes       = 0x00008000,   // >>, << -> angle quotes in some languages
    SetDOIAttr           = 0x00010000,   // Set DOIAttribut

    ChgWordLstLoad       = 0x20000000,   // Replacement list loaded
    CplSttLstLoad        = 0x40000000,   // Exception list for Capital letters Start loaded
    WordStartLstLoad        = 0x80000000,   // Exception list for Word Start loaded
};
namespace o3tl {
    template<> struct typed_flags<ACFlags> : is_typed_flags<ACFlags, 0xe000ffff> {};
    template<> struct typed_flags<ACFlags> : is_typed_flags<ACFlags, 0xe001ffff> {};
}

enum class ACQuotes
@@ -416,6 +417,9 @@ public:
    bool FnSetINetAttr( SvxAutoCorrDoc&, const OUString&,
                                sal_Int32 nSttPos, sal_Int32 nEndPos,
                                LanguageType eLang );
    bool FnSetDOIAttr( SvxAutoCorrDoc&, const OUString&,
                                sal_Int32 nSttPos, sal_Int32 nEndPos,
                                LanguageType eLang );
    bool FnChgWeightUnderl( SvxAutoCorrDoc&, const OUString&,
                                sal_Int32 nEndPos );
    void FnCapitalStartSentence( SvxAutoCorrDoc&, const OUString&, bool bNormalPos,
diff --git a/include/editeng/swafopt.hxx b/include/editeng/swafopt.hxx
index 180ba6d..7191938 100644
--- a/include/editeng/swafopt.hxx
+++ b/include/editeng/swafopt.hxx
@@ -109,6 +109,7 @@ struct EDITENG_DLLPUBLIC SvxSwAutoFormatFlags
    bool bChgAngleQuotes : 1;
    bool bChgWeightUnderl : 1;
    bool bSetINetAttr : 1;
    bool bSetDOIAttr : 1;

    bool bSetBorder : 1;
    bool bCreateTable : 1;
diff --git a/include/svl/urihelper.hxx b/include/svl/urihelper.hxx
index 9f8588c..68843c1 100644
--- a/include/svl/urihelper.hxx
+++ b/include/svl/urihelper.hxx
@@ -120,6 +120,11 @@ SVL_DLLPUBLIC OUString FindFirstURLInText(OUString const & rText,
                                          INetURLObject::EncodeMechanism eMechanism = INetURLObject::EncodeMechanism::WasEncoded,
                                          rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);

SVL_DLLPUBLIC OUString FindFirstDOIInText(OUString const & rText,
                                          sal_Int32 & rBegin,
                                          sal_Int32 & rEnd,
                                          CharClass const & rCharClass);

/** Remove any password component from both absolute and relative URLs.

    @ATT  The current implementation will not remove a password from a
diff --git a/officecfg/registry/schema/org/openoffice/Office/Common.xcs b/officecfg/registry/schema/org/openoffice/Office/Common.xcs
index 9beda4a..3bba443 100644
--- a/officecfg/registry/schema/org/openoffice/Office/Common.xcs
+++ b/officecfg/registry/schema/org/openoffice/Office/Common.xcs
@@ -1335,6 +1335,16 @@
        </info>
        <value>true</value>
      </prop>
      <prop oor:name="SetDOIAttribute" oor:type="xs:boolean" oor:nillable="false">
        <!-- UIHints: Tools  AutoCorrect/AutoFormat  Options - DOI
             recognition -->
        <info>
          <desc>Specifies if character strings which could represent a DOI
          should be converted to a hyperlink.</desc>
          <label>Detect DOI</label>
        </info>
        <value>true</value>
      </prop>
      <prop oor:name="ChangeOrdinalNumber" oor:type="xs:boolean" oor:nillable="false">
        <!-- UIHints: Tools  AutoCorrect/AutoFormat  Options - Replace
             1st... -->
diff --git a/officecfg/registry/schema/org/openoffice/Office/Writer.xcs b/officecfg/registry/schema/org/openoffice/Office/Writer.xcs
index bc84344..7f0f55d 100644
--- a/officecfg/registry/schema/org/openoffice/Office/Writer.xcs
+++ b/officecfg/registry/schema/org/openoffice/Office/Writer.xcs
@@ -4173,6 +4173,14 @@
            </info>
            <value>true</value>
          </prop>
          <prop oor:name="SetDOIAttribute" oor:type="xs:boolean" oor:nillable="false">
            <!-- UIHints: Tools  AutoCorrect/AutoFormat  Options - DOI citation recognition -->
            <info>
              <desc>Specifies whether character strings which could represent a DOI are converted to a hyperlink.</desc>
              <label>Detect DOI</label>
            </info>
            <value>true</value>
          </prop>
          <prop oor:name="ChangeOrdinalNumber" oor:type="xs:boolean" oor:nillable="false">
            <!-- UIHints: Tools  AutoCorrect/AutoFormat  Options - Replace 1st... -->
            <info>
diff --git a/svl/qa/unit/test_URIHelper.cxx b/svl/qa/unit/test_URIHelper.cxx
index eb5135c..df9e5d5 100644
--- a/svl/qa/unit/test_URIHelper.cxx
+++ b/svl/qa/unit/test_URIHelper.cxx
@@ -181,11 +181,14 @@ public:

    void testFindFirstURLInText();

    void testFindFirstDOIInText();

    void testResolveIdnaHost();

    CPPUNIT_TEST_SUITE(Test);
    CPPUNIT_TEST(testNormalizedMakeRelative);
    CPPUNIT_TEST(testFindFirstURLInText);
    CPPUNIT_TEST(testFindFirstDOIInText);
    CPPUNIT_TEST(testResolveIdnaHost);
    CPPUNIT_TEST(finish);
    CPPUNIT_TEST_SUITE_END();
@@ -398,6 +401,57 @@ void Test::testFindFirstURLInText() {
    }
}

void Test::testFindFirstDOIInText() {
    struct Data {
        char const * input;
        char const * result;
        sal_Int32 begin;
        sal_Int32 end;
    };
    static Data const tests[] = {
        { "doi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with only digits
        { "doi:10.1038/nature03001", "https://doi.org/10.1038/nature03001", 0, 23 }, // valid doi suffix with alphanumeric characters
        { "doi:10.1093/ajae/aaq063", "https://doi.org/10.1093/ajae/aaq063", 0, 23 }, // valid doi suffix with multiple slash
        { "doi:10.1016/S0735-1097(98)00347-7", "https://doi.org/10.1016/S0735-1097(98)00347-7", 0, 33 }, // valid doi suffix with characters apart from alphanumeric
        { "doi:10.109/ajae/aaq063", nullptr, 0, 0 }, // # of digits after doi;10. is not between 4 and 9
        { "doi:10.1234567890/ajae/aaq063", nullptr, 0, 0 }, // # of digits after doi;10. is not between 4 and 9
        { "doi:10.1093/ajae/aaq063/", nullptr, 0, 0 }, // nothing after slash
        { "doi:10.1093", nullptr, 0, 0 }, // no slash
        { "doi:11.1093/ajae/aaq063", nullptr, 0, 0 }, // doesn't begin with doi:10.
    };
    CharClass charClass( m_context, LanguageTag( css::lang::Locale("en", "US", "")));
    for (std::size_t i = 0; i < SAL_N_ELEMENTS(tests); ++i) {
        OUString input(OUString::createFromAscii(tests[i].input));
        sal_Int32 begin = 0;
        sal_Int32 end = input.getLength();
        OUString result(
            URIHelper::FindFirstDOIInText(input, begin, end, charClass));
        bool ok = tests[i].result == nullptr
            ? (result.getLength() == 0 && begin == input.getLength()
               && end == input.getLength())
            : (result.equalsAscii(tests[i].result) && begin == tests[i].begin
               && end == tests[i].end);
        OString msg;
        if (!ok) {
            OStringBuffer buf;
            buf.append(OString::Concat("\"")
                + tests[i].input
                + "\" -> ");
            buf.append(tests[i].result == nullptr ? "none" : tests[i].result);
            buf.append(" ("
                + OString::number(tests[i].begin)
                + ", "
                + OString::number(tests[i].end)
                + ")"
                " != "
                + OUStringToOString(result, RTL_TEXTENCODING_UTF8)
                + " (" + OString::number(begin) + ", " + OString::number(end) +")");
            msg = buf.makeStringAndClear();
        }
        CPPUNIT_ASSERT_MESSAGE(msg.getStr(), ok);
    }
}

void Test::testResolveIdnaHost() {
    OUString input;

diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx
index 6f121fb..0043b78 100644
--- a/svl/source/misc/urihelper.cxx
+++ b/svl/source/misc/urihelper.cxx
@@ -745,6 +745,65 @@ OUString URIHelper::FindFirstURLInText(OUString const & rText,
    return OUString();
}

OUString URIHelper::FindFirstDOIInText(OUString const & rText,
                                       sal_Int32 & rBegin,
                                       sal_Int32 & rEnd,
                                       CharClass const & rCharClass)
{
    if (rBegin > rEnd || rEnd > rText.getLength())
        return OUString();

    sal_Int32 start = 7;
    sal_Int32 count = rEnd-rBegin;
    OUString candidate(rText.subView(rBegin, count));
    // Match with regex "doi:10\.\d{4,9}\/[-._;()\/:a-zA-Z0-9]+"
    if (candidate.startsWith("doi:10."))
    {
        bool flag = true;
        sal_Int32 digit = 0;
        for (sal_Int32 i=start; i<count; i++)
        {
            sal_Unicode c = candidate[i];
            // Match 4 to 9 digits before slash
            if (digit >= 0)
            {
                if (digit>9)
                {
                    flag = false;
                    break;
                }

                if ( rCharClass.isDigit(candidate,i) )
                {
                    digit++;
                }
                else if (c=='/' && digit>=4 && i<count-1)
                {
                    digit=-1;
                }
                else
                {
                    flag = false;
                    break;
                }
            }
            // Match [-._;()\/:a-zA-Z0-9] after slash
            else if (!( rCharClass.isAlphaNumeric(candidate, i) || c == '.' || c == '-' || c=='_' ||
                        c==';' || c=='(' || c==')' || c=='\\' || (c=='/' && i<count-1) || c==':'))
            {
                flag = false;
                break;
            }
        }
        if (flag && digit==-1)
        {
            return candidate.replaceFirst("doi:","https://doi.org/");
        }
    }
    rBegin = rEnd;
    return OUString();
}

OUString URIHelper::removePassword(OUString const & rURI,
                                   INetURLObject::EncodeMechanism eEncodeMechanism,
                                   INetURLObject::DecodeMechanism eDecodeMechanism,
diff --git a/sw/inc/comcore.hxx b/sw/inc/comcore.hxx
index 2fda83a..3e43536 100644
--- a/sw/inc/comcore.hxx
+++ b/sw/inc/comcore.hxx
@@ -43,8 +43,9 @@
#define STR_AUTOFMTREDL_DEL_MORELINES           20
#define STR_AUTOFMTREDL_NON_BREAK_SPACE         21
#define STR_AUTOFMTREDL_TRANSLITERATE_RTL       22
#define STR_AUTOFMTREDL_DETECT_DOI              23
// !!!!!!!!!!!!!!!!!!!!!!!!!!  always set the correct end !!!!!!!!!!!!
#define STR_AUTOFMTREDL_END                     23
#define STR_AUTOFMTREDL_END                     24

#endif

diff --git a/sw/inc/utlui.hrc b/sw/inc/utlui.hrc
index 2d71c11..d5998e1 100644
--- a/sw/inc/utlui.hrc
+++ b/sw/inc/utlui.hrc
@@ -48,7 +48,8 @@ const TranslateId RID_SHELLRES_AUTOFMTSTRS[] =
    NC_("RID_SHELLRES_AUTOFMTSTRS", "Set \"Bullet\" or \"Numbering\" Style"),
    NC_("RID_SHELLRES_AUTOFMTSTRS", "Combine paragraphs"),
    NC_("RID_SHELLRES_AUTOFMTSTRS", "Add non breaking space"),
    NC_("RID_SHELLRES_AUTOFMTSTRS", "Transliterates RTL Hungarian text to Old Hungarian script")
    NC_("RID_SHELLRES_AUTOFMTSTRS", "Transliterates RTL Hungarian text to Old Hungarian script"),
    NC_("RID_SHELLRES_AUTOFMTSTRS", "DOI citation recognition")
};

#endif
diff --git a/sw/source/core/edit/autofmt.cxx b/sw/source/core/edit/autofmt.cxx
index 0f5d4cd..b63f19b 100644
--- a/sw/source/core/edit/autofmt.cxx
+++ b/sw/source/core/edit/autofmt.cxx
@@ -2189,7 +2189,11 @@ void SwAutoFormat::AutoCorrect(TextFrameIndex nPos)
                ( m_aFlags.bSetINetAttr &&
                    (nPos == TextFrameIndex(pText->getLength()) || IsSpace((*pText)[sal_Int32(nPos)])) &&
                    SetRedlineText( STR_AUTOFMTREDL_DETECT_URL ) &&
                    pATst->FnSetINetAttr(aACorrDoc, *pText, sal_Int32(nLastBlank), sal_Int32(nPos), eLang)))
                    pATst->FnSetINetAttr(aACorrDoc, *pText, sal_Int32(nLastBlank), sal_Int32(nPos), eLang)) ||
                ( m_aFlags.bSetDOIAttr &&
                    (nPos == TextFrameIndex(pText->getLength()) || IsSpace((*pText)[sal_Int32(nPos)])) &&
                    SetRedlineText( STR_AUTOFMTREDL_DETECT_DOI ) &&
                    pATst->FnSetDOIAttr(aACorrDoc, *pText, sal_Int32(nLastBlank), sal_Int32(nPos), eLang)))
            {
                nPos = m_pCurTextFrame->MapModelToViewPos(*m_aDelPam.GetPoint());
            }
@@ -2779,7 +2783,8 @@ void SwEditShell::AutoFormatBySplitNode()
        SvxAutoCorrect* pACorr = SvxAutoCorrCfg::Get().GetAutoCorrect();
        if( pACorr && !pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord |
                                ACFlags::AddNonBrkSpace | ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL |
                                ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect ))
                                ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect |
                                ACFlags::SetDOIAttr ))
            pACorr = nullptr;

        if( pACorr )
diff --git a/sw/source/uibase/docvw/edtwin.cxx b/sw/source/uibase/docvw/edtwin.cxx
index a15dddf..5bcbd4a 100644
--- a/sw/source/uibase/docvw/edtwin.cxx
+++ b/sw/source/uibase/docvw/edtwin.cxx
@@ -2570,7 +2570,8 @@ KEYINPUT_CHECKTABLE_INSDEL:
                    pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord |
                                            ACFlags::ChgOrdinalNumber | ACFlags::AddNonBrkSpace |
                                            ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr |
                                            ACFlags::Autocorrect | ACFlags::TransliterateRTL ) &&
                                            ACFlags::Autocorrect | ACFlags::TransliterateRTL |
                                            ACFlags::SetDOIAttr ) &&
                    '\"' != aCh && '\'' != aCh && '*' != aCh && '_' != aCh
                    )
                {
@@ -2608,7 +2609,7 @@ KEYINPUT_CHECKTABLE_INSDEL:
                pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord |
                                        ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL |
                                        ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr |
                                        ACFlags::Autocorrect ) &&
                                        ACFlags::Autocorrect | ACFlags::SetDOIAttr ) &&
                !rSh.HasReadonlySel() )
            {
                FlushInBuffer();
diff --git a/sw/source/uibase/shells/textsh.cxx b/sw/source/uibase/shells/textsh.cxx
index 8ecd185..e7af6d5 100644
--- a/sw/source/uibase/shells/textsh.cxx
+++ b/sw/source/uibase/shells/textsh.cxx
@@ -167,7 +167,8 @@ void SwTextShell::ExecInsert(SfxRequest &rReq)
                && pACorr->IsAutoCorrFlag(
                    ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord |
                    ACFlags::AddNonBrkSpace | ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL |
                    ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect ) )
                    ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect |
                    ACFlags::SetDOIAttr ) )
            {
                rSh.AutoCorrect( *pACorr, cIns );
            }
diff --git a/sw/source/uibase/wrtsh/wrtsh1.cxx b/sw/source/uibase/wrtsh/wrtsh1.cxx
index 65f434d..0cdd279 100644
--- a/sw/source/uibase/wrtsh/wrtsh1.cxx
+++ b/sw/source/uibase/wrtsh/wrtsh1.cxx
@@ -173,7 +173,8 @@ static SvxAutoCorrect* lcl_IsAutoCorr()
    SvxAutoCorrect* pACorr = SvxAutoCorrCfg::Get().GetAutoCorrect();
    if( pACorr && !pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord |
                            ACFlags::AddNonBrkSpace | ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL |
                            ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect ))
                            ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect |
                            ACFlags::SetDOIAttr ))
        pACorr = nullptr;
    return pACorr;
}