tdf#145925: Add DOI recognition
Detect DOI string in the form of "doi:10.*" and add hyperlink to it.
It works the same way as url recognition.
Change-Id: I3c4e78a110fd81ad7e727d5e9acee7e51127466a
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/150954
Tested-by: Jenkins
Reviewed-by: Heiko Tietze <heiko.tietze@documentfoundation.org>
Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
diff --git a/cui/inc/strings.hrc b/cui/inc/strings.hrc
index 498e60e..442ac51 100644
--- a/cui/inc/strings.hrc
+++ b/cui/inc/strings.hrc
@@ -332,6 +332,7 @@
#define RID_CUISTR_BOLD_UNDER NC_("RID_SVXSTR_BOLD_UNDER", "Automatic *bold*, /italic/, -strikeout- and _underline_")
#define RID_CUISTR_NO_DBL_SPACES NC_("RID_SVXSTR_NO_DBL_SPACES", "Ignore double spaces")
#define RID_CUISTR_DETECT_URL NC_("RID_SVXSTR_DETECT_URL", "URL Recognition")
#define RID_CUISTR_DETECT_DOI NC_("RID_SVXSTR_DETECT_DOI", "DOI citation recognition")
#define RID_CUISTR_DASH NC_("RID_SVXSTR_DASH", "Replace dashes")
#define RID_CUISTR_CORRECT_ACCIDENTAL_CAPS_LOCK NC_("RID_SVXSTR_CORRECT_ACCIDENTAL_CAPS_LOCK", "Correct accidental use of cAPS LOCK key")
#define RID_CUISTR_NON_BREAK_SPACE NC_("RID_SVXSTR_NON_BREAK_SPACE", "Add non-breaking space before specific punctuation marks in French text")
diff --git a/cui/source/inc/autocdlg.hxx b/cui/source/inc/autocdlg.hxx
index 7b48423..2357677 100644
--- a/cui/source/inc/autocdlg.hxx
+++ b/cui/source/inc/autocdlg.hxx
@@ -57,6 +57,7 @@ private:
OUString m_sStartCap;
OUString m_sBoldUnderline;
OUString m_sURL;
OUString m_sDOI;
OUString m_sNoDblSpaces;
OUString m_sDash;
OUString m_sAccidentalCaps;
@@ -92,6 +93,7 @@ class OfaSwAutoFmtOptionsPage : public SfxTabPage
OUString sNoDblSpaces;
OUString sCorrectCapsLock;
OUString sDetectURL;
OUString sDetectDOI;
OUString sDash;
OUString sRightMargin;
OUString sNum;
diff --git a/cui/source/tabpages/autocdlg.cxx b/cui/source/tabpages/autocdlg.cxx
index aea3f7d..38a261a 100644
--- a/cui/source/tabpages/autocdlg.cxx
+++ b/cui/source/tabpages/autocdlg.cxx
@@ -189,6 +189,7 @@ OfaAutocorrOptionsPage::OfaAutocorrOptionsPage(weld::Container* pPage, weld::Dia
, m_sStartCap(CuiResId(RID_CUISTR_CPTL_STT_SENT))
, m_sBoldUnderline(CuiResId(RID_CUISTR_BOLD_UNDER))
, m_sURL(CuiResId(RID_CUISTR_DETECT_URL))
, m_sDOI(CuiResId(RID_CUISTR_DETECT_DOI))
, m_sNoDblSpaces(CuiResId(RID_CUISTR_NO_DBL_SPACES))
, m_sDash(CuiResId(RID_CUISTR_DASH))
, m_sAccidentalCaps(CuiResId(RID_CUISTR_CORRECT_ACCIDENTAL_CAPS_LOCK))
@@ -223,6 +224,7 @@ bool OfaAutocorrOptionsPage::FillItemSet( SfxItemSet* )
pAutoCorrect->SetAutoCorrFlag(ACFlags::CapitalStartSentence, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
pAutoCorrect->SetAutoCorrFlag(ACFlags::ChgWeightUnderl, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
pAutoCorrect->SetAutoCorrFlag(ACFlags::SetINetAttr, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
pAutoCorrect->SetAutoCorrFlag(ACFlags::SetDOIAttr, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
pAutoCorrect->SetAutoCorrFlag(ACFlags::ChgToEnEmDash, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
pAutoCorrect->SetAutoCorrFlag(ACFlags::IgnoreDoubleSpace, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
pAutoCorrect->SetAutoCorrFlag(ACFlags::CorrectCapsLock, m_xCheckLB->get_toggle(nPos++) == TRISTATE_TRUE);
@@ -263,6 +265,7 @@ void OfaAutocorrOptionsPage::Reset( const SfxItemSet* )
InsertEntry(m_sStartCap);
InsertEntry(m_sBoldUnderline);
InsertEntry(m_sURL);
InsertEntry(m_sDOI);
InsertEntry(m_sDash);
InsertEntry(m_sNoDblSpaces);
InsertEntry(m_sAccidentalCaps);
@@ -273,6 +276,7 @@ void OfaAutocorrOptionsPage::Reset( const SfxItemSet* )
m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::CapitalStartSentence) ? TRISTATE_TRUE : TRISTATE_FALSE );
m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::ChgWeightUnderl) ? TRISTATE_TRUE : TRISTATE_FALSE );
m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::SetINetAttr) ? TRISTATE_TRUE : TRISTATE_FALSE );
m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::SetDOIAttr) ? TRISTATE_TRUE : TRISTATE_FALSE );
m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::ChgToEnEmDash) ? TRISTATE_TRUE : TRISTATE_FALSE );
m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::IgnoreDoubleSpace) ? TRISTATE_TRUE : TRISTATE_FALSE );
m_xCheckLB->set_toggle( nPos++, bool(nFlags & ACFlags::CorrectCapsLock) ? TRISTATE_TRUE : TRISTATE_FALSE );
@@ -333,6 +337,7 @@ enum OfaAutoFmtOptions
BEGIN_UPPER,
BOLD_UNDERLINE,
DETECT_URL,
DETECT_DOI,
REPLACE_DASHES,
DEL_SPACES_AT_STT_END,
DEL_SPACES_BETWEEN_LINES,
@@ -363,6 +368,7 @@ OfaSwAutoFmtOptionsPage::OfaSwAutoFmtOptionsPage(weld::Container* pPage, weld::D
, sNoDblSpaces(CuiResId(RID_CUISTR_NO_DBL_SPACES))
, sCorrectCapsLock(CuiResId(RID_CUISTR_CORRECT_ACCIDENTAL_CAPS_LOCK))
, sDetectURL(CuiResId(RID_CUISTR_DETECT_URL))
, sDetectDOI(CuiResId(RID_CUISTR_DETECT_DOI))
, sDash(CuiResId(RID_CUISTR_DASH))
, sRightMargin(CuiResId(RID_CUISTR_RIGHT_MARGIN))
, sNum(CuiResId(RID_CUISTR_NUM))
@@ -455,6 +461,12 @@ bool OfaSwAutoFmtOptionsPage::FillItemSet( SfxItemSet* )
pAutoCorrect->SetAutoCorrFlag(ACFlags::SetINetAttr,
m_xCheckLB->get_toggle(DETECT_URL, CBCOL_SECOND) == TRISTATE_TRUE);
bCheck = m_xCheckLB->get_toggle(DETECT_DOI, CBCOL_FIRST) == TRISTATE_TRUE;
bModified |= pOpt->bSetDOIAttr != bCheck;
pOpt->bSetDOIAttr = bCheck;
pAutoCorrect->SetAutoCorrFlag(ACFlags::SetDOIAttr,
m_xCheckLB->get_toggle(DETECT_DOI, CBCOL_SECOND) == TRISTATE_TRUE);
bCheck = m_xCheckLB->get_toggle(DEL_EMPTY_NODE, CBCOL_FIRST) == TRISTATE_TRUE;
bModified |= pOpt->bDelEmptyNode != bCheck;
pOpt->bDelEmptyNode = bCheck;
@@ -558,6 +570,7 @@ void OfaSwAutoFmtOptionsPage::Reset( const SfxItemSet* )
CreateEntry(sCapitalStartSentence, CBCOL_BOTH );
CreateEntry(sBoldUnder, CBCOL_BOTH );
CreateEntry(sDetectURL, CBCOL_BOTH );
CreateEntry(sDetectDOI, CBCOL_BOTH );
CreateEntry(sDash, CBCOL_BOTH );
CreateEntry(sDelSpaceAtSttEnd, CBCOL_BOTH );
CreateEntry(sDelSpaceBetweenLines, CBCOL_BOTH );
@@ -583,6 +596,8 @@ void OfaSwAutoFmtOptionsPage::Reset( const SfxItemSet* )
m_xCheckLB->set_toggle(BOLD_UNDERLINE, bool(nFlags & ACFlags::ChgWeightUnderl) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND);
m_xCheckLB->set_toggle(DETECT_URL, pOpt->bSetINetAttr ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST);
m_xCheckLB->set_toggle(DETECT_URL, bool(nFlags & ACFlags::SetINetAttr) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND);
m_xCheckLB->set_toggle(DETECT_DOI, pOpt->bSetDOIAttr ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST);
m_xCheckLB->set_toggle(DETECT_DOI, bool(nFlags & ACFlags::SetDOIAttr) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND);
m_xCheckLB->set_toggle(REPLACE_DASHES, pOpt->bChgToEnEmDash ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST);
m_xCheckLB->set_toggle(REPLACE_DASHES, bool(nFlags & ACFlags::ChgToEnEmDash) ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_SECOND);
m_xCheckLB->set_toggle(DEL_SPACES_AT_STT_END, pOpt->bAFormatDelSpacesAtSttEnd ? TRISTATE_TRUE : TRISTATE_FALSE, CBCOL_FIRST);
diff --git a/editeng/source/misc/acorrcfg.cxx b/editeng/source/misc/acorrcfg.cxx
index 787663a..8603b43 100644
--- a/editeng/source/misc/acorrcfg.cxx
+++ b/editeng/source/misc/acorrcfg.cxx
@@ -185,9 +185,10 @@ Sequence<OUString> SvxBaseAutoCorrCfg::GetPropertyNames()
"DoubleQuoteAtEnd", // 16
"CorrectAccidentalCapsLock", // 17
"TransliterateRTL", // 18
"ChangeAngleQuotes" // 19
"ChangeAngleQuotes", // 19
"SetDOIAttribute", // 20
};
const int nCount = 20;
const int nCount = 21;
Sequence<OUString> aNames(nCount);
OUString* pNames = aNames.getArray();
for(int i = 0; i < nCount; i++)
@@ -298,6 +299,10 @@ void SvxBaseAutoCorrCfg::Load(bool bInit)
if(*o3tl::doAccess<bool>(pValues[nProp]))
nFlags |= ACFlags::ChgAngleQuotes;
break;//"ChangeAngleQuotes"
case 20:
if(*o3tl::doAccess<bool>(pValues[nProp]))
nFlags |= ACFlags::SetDOIAttr;
break;//"SetDOIAttr",
}
}
}
@@ -333,6 +338,7 @@ void SvxBaseAutoCorrCfg::ImplCommit()
css::uno::Any(bool(nFlags & ACFlags::ChgWeightUnderl)),
// "ChangeUnderlineWeight"
css::uno::Any(bool(nFlags & ACFlags::SetINetAttr)), // "SetInetAttribute"
css::uno::Any(bool(nFlags & ACFlags::SetDOIAttr)), // "SetDOIAttr"
css::uno::Any(bool(nFlags & ACFlags::ChgOrdinalNumber)),
// "ChangeOrdinalNumber"
css::uno::Any(bool(nFlags & ACFlags::AddNonBrkSpace)), // "AddNonBreakingSpace"
@@ -414,8 +420,9 @@ Sequence<OUString> SvxSwAutoCorrCfg::GetPropertyNames()
"Format/ByInput/ApplyNumbering/SpecialCharacter/FontFamily", //44
"Format/ByInput/ApplyNumbering/SpecialCharacter/FontCharset", //45
"Format/ByInput/ApplyNumbering/SpecialCharacter/FontPitch", //46
"Format/Option/SetDOIAttribute", //47
};
const int nCount = 47;
const int nCount = 48;
Sequence<OUString> aNames(nCount);
OUString* pNames = aNames.getArray();
for(int i = 0; i < nCount; i++)
@@ -565,6 +572,7 @@ void SvxSwAutoCorrCfg::Load(bool bInit)
rSwFlags.aByInputBulletFont.SetPitch(FontPitch(nVal));
}
break;// "Format/ByInput/ApplyNumbering/SpecialCharacter/FontPitch",
case 47: rSwFlags.bSetDOIAttr = *o3tl::doAccess<bool>(pValues[nProp]); break; // "Format/Option/SetDOIAttribute",
}
}
}
@@ -666,8 +674,10 @@ void SvxSwAutoCorrCfg::ImplCommit()
// "Format/ByInput/ApplyNumbering/SpecialCharacter/FontFamily"
css::uno::Any(sal_Int32(rSwFlags.aByInputBulletFont.GetCharSet())),
// "Format/ByInput/ApplyNumbering/SpecialCharacter/FontCharset"
css::uno::Any(sal_Int32(rSwFlags.aByInputBulletFont.GetPitch()))});
css::uno::Any(sal_Int32(rSwFlags.aByInputBulletFont.GetPitch())),
// "Format/ByInput/ApplyNumbering/SpecialCharacter/FontPitch"
css::uno::Any(rSwFlags.bSetDOIAttr)});
// "Format/Option/SetDOIAttribute"
}
void SvxSwAutoCorrCfg::Notify( const Sequence<OUString>& /* aPropertyNames */ )
diff --git a/editeng/source/misc/svxacorr.cxx b/editeng/source/misc/svxacorr.cxx
index 5e229ba..6874303 100644
--- a/editeng/source/misc/svxacorr.cxx
+++ b/editeng/source/misc/svxacorr.cxx
@@ -289,6 +289,7 @@ ACFlags SvxAutoCorrect::GetDefaultFlags()
| ACFlags::ChgAngleQuotes
| ACFlags::ChgWeightUnderl
| ACFlags::SetINetAttr
| ACFlags::SetDOIAttr
| ACFlags::ChgQuotes
| ACFlags::SaveWordCplSttLst
| ACFlags::SaveWordWordStartLst
@@ -752,6 +753,18 @@ bool SvxAutoCorrect::FnSetINetAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
return bRet;
}
// DOI citation recognition
bool SvxAutoCorrect::FnSetDOIAttr( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
sal_Int32 nSttPos, sal_Int32 nEndPos,
LanguageType eLang )
{
OUString sURL( URIHelper::FindFirstDOIInText( rTxt, nSttPos, nEndPos, GetCharClass( eLang ) ));
bool bRet = !sURL.isEmpty();
if( bRet ) // so, set attribute:
rDoc.SetINetAttr( nSttPos, nEndPos, sURL );
return bRet;
}
// Automatic *bold*, /italic/, -strikeout- and _underline_
bool SvxAutoCorrect::FnChgWeightUnderl( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
sal_Int32 nEndPos )
@@ -1609,7 +1622,10 @@ void SvxAutoCorrect::DoAutoCorrect( SvxAutoCorrDoc& rDoc, const OUString& rTxt,
FnChgOrdinalNumber( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
( IsAutoCorrFlag( ACFlags::SetINetAttr ) &&
( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
FnSetINetAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) ||
( IsAutoCorrFlag( ACFlags::SetDOIAttr ) &&
( ' ' == cChar || '\t' == cChar || 0x0a == cChar || !cChar ) &&
FnSetDOIAttr( rDoc, rTxt, nCapLttrPos, nInsPos, eLang ) ) )
;
else
{
diff --git a/editeng/source/misc/swafopt.cxx b/editeng/source/misc/swafopt.cxx
index 2935545..f6de409 100644
--- a/editeng/source/misc/swafopt.cxx
+++ b/editeng/source/misc/swafopt.cxx
@@ -36,6 +36,7 @@ SvxSwAutoFormatFlags::SvxSwAutoFormatFlags()
bChgToEnEmDash =
bChgWeightUnderl =
bSetINetAttr =
bSetDOIAttr =
bAFormatDelSpacesAtSttEnd =
bAFormatDelSpacesBetweenLines =
bAFormatByInpDelSpacesAtSttEnd =
diff --git a/include/editeng/svxacorr.hxx b/include/editeng/svxacorr.hxx
index 39a3d4c..eddf8c9 100644
--- a/include/editeng/svxacorr.hxx
+++ b/include/editeng/svxacorr.hxx
@@ -76,13 +76,14 @@ enum class ACFlags : sal_uInt32 {
CorrectCapsLock = 0x00002000, // Correct accidental use of cAPS LOCK key
TransliterateRTL = 0x00004000, // Transliterate RTL text
ChgAngleQuotes = 0x00008000, // >>, << -> angle quotes in some languages
SetDOIAttr = 0x00010000, // Set DOIAttribut
ChgWordLstLoad = 0x20000000, // Replacement list loaded
CplSttLstLoad = 0x40000000, // Exception list for Capital letters Start loaded
WordStartLstLoad = 0x80000000, // Exception list for Word Start loaded
};
namespace o3tl {
template<> struct typed_flags<ACFlags> : is_typed_flags<ACFlags, 0xe000ffff> {};
template<> struct typed_flags<ACFlags> : is_typed_flags<ACFlags, 0xe001ffff> {};
}
enum class ACQuotes
@@ -416,6 +417,9 @@ public:
bool FnSetINetAttr( SvxAutoCorrDoc&, const OUString&,
sal_Int32 nSttPos, sal_Int32 nEndPos,
LanguageType eLang );
bool FnSetDOIAttr( SvxAutoCorrDoc&, const OUString&,
sal_Int32 nSttPos, sal_Int32 nEndPos,
LanguageType eLang );
bool FnChgWeightUnderl( SvxAutoCorrDoc&, const OUString&,
sal_Int32 nEndPos );
void FnCapitalStartSentence( SvxAutoCorrDoc&, const OUString&, bool bNormalPos,
diff --git a/include/editeng/swafopt.hxx b/include/editeng/swafopt.hxx
index 180ba6d..7191938 100644
--- a/include/editeng/swafopt.hxx
+++ b/include/editeng/swafopt.hxx
@@ -109,6 +109,7 @@ struct EDITENG_DLLPUBLIC SvxSwAutoFormatFlags
bool bChgAngleQuotes : 1;
bool bChgWeightUnderl : 1;
bool bSetINetAttr : 1;
bool bSetDOIAttr : 1;
bool bSetBorder : 1;
bool bCreateTable : 1;
diff --git a/include/svl/urihelper.hxx b/include/svl/urihelper.hxx
index 9f8588c..68843c1 100644
--- a/include/svl/urihelper.hxx
+++ b/include/svl/urihelper.hxx
@@ -120,6 +120,11 @@ SVL_DLLPUBLIC OUString FindFirstURLInText(OUString const & rText,
INetURLObject::EncodeMechanism eMechanism = INetURLObject::EncodeMechanism::WasEncoded,
rtl_TextEncoding eCharset = RTL_TEXTENCODING_UTF8);
SVL_DLLPUBLIC OUString FindFirstDOIInText(OUString const & rText,
sal_Int32 & rBegin,
sal_Int32 & rEnd,
CharClass const & rCharClass);
/** Remove any password component from both absolute and relative URLs.
@ATT The current implementation will not remove a password from a
diff --git a/officecfg/registry/schema/org/openoffice/Office/Common.xcs b/officecfg/registry/schema/org/openoffice/Office/Common.xcs
index 9beda4a..3bba443 100644
--- a/officecfg/registry/schema/org/openoffice/Office/Common.xcs
+++ b/officecfg/registry/schema/org/openoffice/Office/Common.xcs
@@ -1335,6 +1335,16 @@
</info>
<value>true</value>
</prop>
<prop oor:name="SetDOIAttribute" oor:type="xs:boolean" oor:nillable="false">
<!-- UIHints: Tools AutoCorrect/AutoFormat Options - DOI
recognition -->
<info>
<desc>Specifies if character strings which could represent a DOI
should be converted to a hyperlink.</desc>
<label>Detect DOI</label>
</info>
<value>true</value>
</prop>
<prop oor:name="ChangeOrdinalNumber" oor:type="xs:boolean" oor:nillable="false">
<!-- UIHints: Tools AutoCorrect/AutoFormat Options - Replace
1st... -->
diff --git a/officecfg/registry/schema/org/openoffice/Office/Writer.xcs b/officecfg/registry/schema/org/openoffice/Office/Writer.xcs
index bc84344..7f0f55d 100644
--- a/officecfg/registry/schema/org/openoffice/Office/Writer.xcs
+++ b/officecfg/registry/schema/org/openoffice/Office/Writer.xcs
@@ -4173,6 +4173,14 @@
</info>
<value>true</value>
</prop>
<prop oor:name="SetDOIAttribute" oor:type="xs:boolean" oor:nillable="false">
<!-- UIHints: Tools AutoCorrect/AutoFormat Options - DOI citation recognition -->
<info>
<desc>Specifies whether character strings which could represent a DOI are converted to a hyperlink.</desc>
<label>Detect DOI</label>
</info>
<value>true</value>
</prop>
<prop oor:name="ChangeOrdinalNumber" oor:type="xs:boolean" oor:nillable="false">
<!-- UIHints: Tools AutoCorrect/AutoFormat Options - Replace 1st... -->
<info>
diff --git a/svl/qa/unit/test_URIHelper.cxx b/svl/qa/unit/test_URIHelper.cxx
index eb5135c..df9e5d5 100644
--- a/svl/qa/unit/test_URIHelper.cxx
+++ b/svl/qa/unit/test_URIHelper.cxx
@@ -181,11 +181,14 @@ public:
void testFindFirstURLInText();
void testFindFirstDOIInText();
void testResolveIdnaHost();
CPPUNIT_TEST_SUITE(Test);
CPPUNIT_TEST(testNormalizedMakeRelative);
CPPUNIT_TEST(testFindFirstURLInText);
CPPUNIT_TEST(testFindFirstDOIInText);
CPPUNIT_TEST(testResolveIdnaHost);
CPPUNIT_TEST(finish);
CPPUNIT_TEST_SUITE_END();
@@ -398,6 +401,57 @@ void Test::testFindFirstURLInText() {
}
}
void Test::testFindFirstDOIInText() {
struct Data {
char const * input;
char const * result;
sal_Int32 begin;
sal_Int32 end;
};
static Data const tests[] = {
{ "doi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with only digits
{ "doi:10.1038/nature03001", "https://doi.org/10.1038/nature03001", 0, 23 }, // valid doi suffix with alphanumeric characters
{ "doi:10.1093/ajae/aaq063", "https://doi.org/10.1093/ajae/aaq063", 0, 23 }, // valid doi suffix with multiple slash
{ "doi:10.1016/S0735-1097(98)00347-7", "https://doi.org/10.1016/S0735-1097(98)00347-7", 0, 33 }, // valid doi suffix with characters apart from alphanumeric
{ "doi:10.109/ajae/aaq063", nullptr, 0, 0 }, // # of digits after doi;10. is not between 4 and 9
{ "doi:10.1234567890/ajae/aaq063", nullptr, 0, 0 }, // # of digits after doi;10. is not between 4 and 9
{ "doi:10.1093/ajae/aaq063/", nullptr, 0, 0 }, // nothing after slash
{ "doi:10.1093", nullptr, 0, 0 }, // no slash
{ "doi:11.1093/ajae/aaq063", nullptr, 0, 0 }, // doesn't begin with doi:10.
};
CharClass charClass( m_context, LanguageTag( css::lang::Locale("en", "US", "")));
for (std::size_t i = 0; i < SAL_N_ELEMENTS(tests); ++i) {
OUString input(OUString::createFromAscii(tests[i].input));
sal_Int32 begin = 0;
sal_Int32 end = input.getLength();
OUString result(
URIHelper::FindFirstDOIInText(input, begin, end, charClass));
bool ok = tests[i].result == nullptr
? (result.getLength() == 0 && begin == input.getLength()
&& end == input.getLength())
: (result.equalsAscii(tests[i].result) && begin == tests[i].begin
&& end == tests[i].end);
OString msg;
if (!ok) {
OStringBuffer buf;
buf.append(OString::Concat("\"")
+ tests[i].input
+ "\" -> ");
buf.append(tests[i].result == nullptr ? "none" : tests[i].result);
buf.append(" ("
+ OString::number(tests[i].begin)
+ ", "
+ OString::number(tests[i].end)
+ ")"
" != "
+ OUStringToOString(result, RTL_TEXTENCODING_UTF8)
+ " (" + OString::number(begin) + ", " + OString::number(end) +")");
msg = buf.makeStringAndClear();
}
CPPUNIT_ASSERT_MESSAGE(msg.getStr(), ok);
}
}
void Test::testResolveIdnaHost() {
OUString input;
diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx
index 6f121fb..0043b78 100644
--- a/svl/source/misc/urihelper.cxx
+++ b/svl/source/misc/urihelper.cxx
@@ -745,6 +745,65 @@ OUString URIHelper::FindFirstURLInText(OUString const & rText,
return OUString();
}
OUString URIHelper::FindFirstDOIInText(OUString const & rText,
sal_Int32 & rBegin,
sal_Int32 & rEnd,
CharClass const & rCharClass)
{
if (rBegin > rEnd || rEnd > rText.getLength())
return OUString();
sal_Int32 start = 7;
sal_Int32 count = rEnd-rBegin;
OUString candidate(rText.subView(rBegin, count));
// Match with regex "doi:10\.\d{4,9}\/[-._;()\/:a-zA-Z0-9]+"
if (candidate.startsWith("doi:10."))
{
bool flag = true;
sal_Int32 digit = 0;
for (sal_Int32 i=start; i<count; i++)
{
sal_Unicode c = candidate[i];
// Match 4 to 9 digits before slash
if (digit >= 0)
{
if (digit>9)
{
flag = false;
break;
}
if ( rCharClass.isDigit(candidate,i) )
{
digit++;
}
else if (c=='/' && digit>=4 && i<count-1)
{
digit=-1;
}
else
{
flag = false;
break;
}
}
// Match [-._;()\/:a-zA-Z0-9] after slash
else if (!( rCharClass.isAlphaNumeric(candidate, i) || c == '.' || c == '-' || c=='_' ||
c==';' || c=='(' || c==')' || c=='\\' || (c=='/' && i<count-1) || c==':'))
{
flag = false;
break;
}
}
if (flag && digit==-1)
{
return candidate.replaceFirst("doi:","https://doi.org/");
}
}
rBegin = rEnd;
return OUString();
}
OUString URIHelper::removePassword(OUString const & rURI,
INetURLObject::EncodeMechanism eEncodeMechanism,
INetURLObject::DecodeMechanism eDecodeMechanism,
diff --git a/sw/inc/comcore.hxx b/sw/inc/comcore.hxx
index 2fda83a..3e43536 100644
--- a/sw/inc/comcore.hxx
+++ b/sw/inc/comcore.hxx
@@ -43,8 +43,9 @@
#define STR_AUTOFMTREDL_DEL_MORELINES 20
#define STR_AUTOFMTREDL_NON_BREAK_SPACE 21
#define STR_AUTOFMTREDL_TRANSLITERATE_RTL 22
#define STR_AUTOFMTREDL_DETECT_DOI 23
// !!!!!!!!!!!!!!!!!!!!!!!!!! always set the correct end !!!!!!!!!!!!
#define STR_AUTOFMTREDL_END 23
#define STR_AUTOFMTREDL_END 24
#endif
diff --git a/sw/inc/utlui.hrc b/sw/inc/utlui.hrc
index 2d71c11..d5998e1 100644
--- a/sw/inc/utlui.hrc
+++ b/sw/inc/utlui.hrc
@@ -48,7 +48,8 @@ const TranslateId RID_SHELLRES_AUTOFMTSTRS[] =
NC_("RID_SHELLRES_AUTOFMTSTRS", "Set \"Bullet\" or \"Numbering\" Style"),
NC_("RID_SHELLRES_AUTOFMTSTRS", "Combine paragraphs"),
NC_("RID_SHELLRES_AUTOFMTSTRS", "Add non breaking space"),
NC_("RID_SHELLRES_AUTOFMTSTRS", "Transliterates RTL Hungarian text to Old Hungarian script")
NC_("RID_SHELLRES_AUTOFMTSTRS", "Transliterates RTL Hungarian text to Old Hungarian script"),
NC_("RID_SHELLRES_AUTOFMTSTRS", "DOI citation recognition")
};
#endif
diff --git a/sw/source/core/edit/autofmt.cxx b/sw/source/core/edit/autofmt.cxx
index 0f5d4cd..b63f19b 100644
--- a/sw/source/core/edit/autofmt.cxx
+++ b/sw/source/core/edit/autofmt.cxx
@@ -2189,7 +2189,11 @@ void SwAutoFormat::AutoCorrect(TextFrameIndex nPos)
( m_aFlags.bSetINetAttr &&
(nPos == TextFrameIndex(pText->getLength()) || IsSpace((*pText)[sal_Int32(nPos)])) &&
SetRedlineText( STR_AUTOFMTREDL_DETECT_URL ) &&
pATst->FnSetINetAttr(aACorrDoc, *pText, sal_Int32(nLastBlank), sal_Int32(nPos), eLang)))
pATst->FnSetINetAttr(aACorrDoc, *pText, sal_Int32(nLastBlank), sal_Int32(nPos), eLang)) ||
( m_aFlags.bSetDOIAttr &&
(nPos == TextFrameIndex(pText->getLength()) || IsSpace((*pText)[sal_Int32(nPos)])) &&
SetRedlineText( STR_AUTOFMTREDL_DETECT_DOI ) &&
pATst->FnSetDOIAttr(aACorrDoc, *pText, sal_Int32(nLastBlank), sal_Int32(nPos), eLang)))
{
nPos = m_pCurTextFrame->MapModelToViewPos(*m_aDelPam.GetPoint());
}
@@ -2779,7 +2783,8 @@ void SwEditShell::AutoFormatBySplitNode()
SvxAutoCorrect* pACorr = SvxAutoCorrCfg::Get().GetAutoCorrect();
if( pACorr && !pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord |
ACFlags::AddNonBrkSpace | ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL |
ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect ))
ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect |
ACFlags::SetDOIAttr ))
pACorr = nullptr;
if( pACorr )
diff --git a/sw/source/uibase/docvw/edtwin.cxx b/sw/source/uibase/docvw/edtwin.cxx
index a15dddf..5bcbd4a 100644
--- a/sw/source/uibase/docvw/edtwin.cxx
+++ b/sw/source/uibase/docvw/edtwin.cxx
@@ -2570,7 +2570,8 @@ KEYINPUT_CHECKTABLE_INSDEL:
pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord |
ACFlags::ChgOrdinalNumber | ACFlags::AddNonBrkSpace |
ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr |
ACFlags::Autocorrect | ACFlags::TransliterateRTL ) &&
ACFlags::Autocorrect | ACFlags::TransliterateRTL |
ACFlags::SetDOIAttr ) &&
'\"' != aCh && '\'' != aCh && '*' != aCh && '_' != aCh
)
{
@@ -2608,7 +2609,7 @@ KEYINPUT_CHECKTABLE_INSDEL:
pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord |
ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL |
ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr |
ACFlags::Autocorrect ) &&
ACFlags::Autocorrect | ACFlags::SetDOIAttr ) &&
!rSh.HasReadonlySel() )
{
FlushInBuffer();
diff --git a/sw/source/uibase/shells/textsh.cxx b/sw/source/uibase/shells/textsh.cxx
index 8ecd185..e7af6d5 100644
--- a/sw/source/uibase/shells/textsh.cxx
+++ b/sw/source/uibase/shells/textsh.cxx
@@ -167,7 +167,8 @@ void SwTextShell::ExecInsert(SfxRequest &rReq)
&& pACorr->IsAutoCorrFlag(
ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord |
ACFlags::AddNonBrkSpace | ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL |
ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect ) )
ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect |
ACFlags::SetDOIAttr ) )
{
rSh.AutoCorrect( *pACorr, cIns );
}
diff --git a/sw/source/uibase/wrtsh/wrtsh1.cxx b/sw/source/uibase/wrtsh/wrtsh1.cxx
index 65f434d..0cdd279 100644
--- a/sw/source/uibase/wrtsh/wrtsh1.cxx
+++ b/sw/source/uibase/wrtsh/wrtsh1.cxx
@@ -173,7 +173,8 @@ static SvxAutoCorrect* lcl_IsAutoCorr()
SvxAutoCorrect* pACorr = SvxAutoCorrCfg::Get().GetAutoCorrect();
if( pACorr && !pACorr->IsAutoCorrFlag( ACFlags::CapitalStartSentence | ACFlags::CapitalStartWord |
ACFlags::AddNonBrkSpace | ACFlags::ChgOrdinalNumber | ACFlags::TransliterateRTL |
ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect ))
ACFlags::ChgToEnEmDash | ACFlags::SetINetAttr | ACFlags::Autocorrect |
ACFlags::SetDOIAttr ))
pACorr = nullptr;
return pACorr;
}