tdf#145925: Support AutoCapitalize in DOI recognition

Sometimes, the first character of the doi string is auto capitalized, which isn't recognized as DOI.
Now, the doi detection is able to recognize doi string with the first character capitalized,
like what is done in url recognition.

Change-Id: I95334941dc4cda3095f1750fea927640dea55e23
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/151142
Tested-by: Jenkins
Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
diff --git a/svl/qa/unit/test_URIHelper.cxx b/svl/qa/unit/test_URIHelper.cxx
index df9e5d5..33a0899 100644
--- a/svl/qa/unit/test_URIHelper.cxx
+++ b/svl/qa/unit/test_URIHelper.cxx
@@ -410,6 +410,11 @@ void Test::testFindFirstDOIInText() {
    };
    static Data const tests[] = {
        { "doi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with only digits
        { "Doi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized
        { "DoI:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized
        { "DOI:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized
        { "dOI:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized
        { "dOi:10.1000/182", "https://doi.org/10.1000/182", 0, 15 }, // valid doi suffix with some of the the first three character being capitalized
        { "doi:10.1038/nature03001", "https://doi.org/10.1038/nature03001", 0, 23 }, // valid doi suffix with alphanumeric characters
        { "doi:10.1093/ajae/aaq063", "https://doi.org/10.1093/ajae/aaq063", 0, 23 }, // valid doi suffix with multiple slash
        { "doi:10.1016/S0735-1097(98)00347-7", "https://doi.org/10.1016/S0735-1097(98)00347-7", 0, 33 }, // valid doi suffix with characters apart from alphanumeric
diff --git a/svl/source/misc/urihelper.cxx b/svl/source/misc/urihelper.cxx
index 0043b78..9aa78a58 100644
--- a/svl/source/misc/urihelper.cxx
+++ b/svl/source/misc/urihelper.cxx
@@ -757,7 +757,7 @@ OUString URIHelper::FindFirstDOIInText(OUString const & rText,
    sal_Int32 count = rEnd-rBegin;
    OUString candidate(rText.subView(rBegin, count));
    // Match with regex "doi:10\.\d{4,9}\/[-._;()\/:a-zA-Z0-9]+"
    if (candidate.startsWith("doi:10."))
    if (candidate.startsWithIgnoreAsciiCase("doi:10."))
    {
        bool flag = true;
        sal_Int32 digit = 0;
@@ -797,7 +797,7 @@ OUString URIHelper::FindFirstDOIInText(OUString const & rText,
        }
        if (flag && digit==-1)
        {
            return candidate.replaceFirst("doi:","https://doi.org/");
            return OUString::Concat("https://doi.org/")+candidate.subView(4);
        }
    }
    rBegin = rEnd;