tdf#156548: make truncated long bookmark name unique, and use it in hyperlinks

Change-Id: I156359339ff8be85fe90cb6612eafdc6030c851f
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/155092
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
diff --git a/sw/qa/extras/ooxmlexport/data/longBookmarkName.fodt b/sw/qa/extras/ooxmlexport/data/longBookmarkName.fodt
new file mode 100644
index 0000000..8b09760
--- /dev/null
+++ b/sw/qa/extras/ooxmlexport/data/longBookmarkName.fodt
@@ -0,0 +1,13 @@
<?xml version="1.0" encoding="UTF-8"?>

<office:document xmlns:office="urn:oasis:names:tc:opendocument:xmlns:office:1.0" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:text="urn:oasis:names:tc:opendocument:xmlns:text:1.0" office:version="1.3" office:mimetype="application/vnd.oasis.opendocument.text">
 <office:body>
  <office:text>
   <text:p><text:a xlink:type="simple" xlink:href="#A_bookmark_name_longer_than_forty_characters">This is a hyperlink to the first target paragraph below</text:a></text:p>
   <text:p><text:a xlink:type="simple" xlink:href="#A_bookmark_name_longer_than_forty_characters2">This is a hyperlink to the second target paragraph below</text:a></text:p>
   <text:p/>
   <text:p><text:bookmark-start text:name="A_bookmark_name_longer_than_forty_characters"/>The first target paragraph with a bookmark with a very long name<text:bookmark-end text:name="A_bookmark_name_longer_than_forty_characters"/></text:p>
   <text:p><text:bookmark-start text:name="A_bookmark_name_longer_than_forty_characters2"/>The second target paragraph with a bookmark with a very long name<text:bookmark-end text:name="A_bookmark_name_longer_than_forty_characters2"/></text:p>
  </office:text>
 </office:body>
</office:document>
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport19.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport19.cxx
index 6e78419..69a3fc1 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport19.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport19.cxx
@@ -1059,6 +1059,37 @@ DECLARE_OOXMLEXPORT_TEST(testTdf156372, "tdf156372.doc")
    CPPUNIT_ASSERT_EQUAL(1, getPages());
}

CPPUNIT_TEST_FIXTURE(Test, testTdf156548)
{
    // Given a document using two bookmarks with similar names longer than 40 characters
    loadAndReload("longBookmarkName.fodt");

    // After the export, the names must be no longer than 40 characters; they must be unique;
    // and the hyperlinks must use the same names, to still point to the correct targets:

    {
        // 1st  paragraph - hyperlink to 4th paragraph
        auto sURL = getProperty<OUString>(getRun(getParagraph(1), 1), "HyperLinkURL");
        CPPUNIT_ASSERT_EQUAL(OUString("#A_bookmark_name_longer_than_forty_charac"), sURL);
        // 4th paragraph - a bookmark
        auto xBookmark = getProperty<uno::Reference<container::XNamed>>(getRun(getParagraph(4), 1),
                                                                        "Bookmark");
        CPPUNIT_ASSERT_EQUAL(OUString("A_bookmark_name_longer_than_forty_charac"),
                             xBookmark->getName());
    }

    {
        // 2nd  paragraph - hyperlink to 5th paragraph
        auto sURL = getProperty<OUString>(getRun(getParagraph(2), 1), "HyperLinkURL");
        CPPUNIT_ASSERT_EQUAL(OUString("#A_bookmark_name_longer_than_forty_chara1"), sURL);
        // 5th paragraph - a bookmark
        auto xBookmark = getProperty<uno::Reference<container::XNamed>>(getRun(getParagraph(5), 1),
                                                                        "Bookmark");
        CPPUNIT_ASSERT_EQUAL(OUString("A_bookmark_name_longer_than_forty_chara1"),
                             xBookmark->getName());
    }
}

CPPUNIT_PLUGIN_IMPLEMENT();

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/ww8/docxattributeoutput.cxx b/sw/source/filter/ww8/docxattributeoutput.cxx
index c97000c..ec55455 100644
--- a/sw/source/filter/ww8/docxattributeoutput.cxx
+++ b/sw/source/filter/ww8/docxattributeoutput.cxx
@@ -2170,11 +2170,11 @@ void DocxAttributeOutput::EndRun(const SwTextNode* pNode, sal_Int32 nPos, sal_In
    DoWriteBookmarkEndIfExist(nPos);
}

void DocxAttributeOutput::DoWriteBookmarkTagStart(std::u16string_view bookmarkName)
void DocxAttributeOutput::DoWriteBookmarkTagStart(const OUString& bookmarkName)
{
    m_pSerializer->singleElementNS(XML_w, XML_bookmarkStart,
        FSNS(XML_w, XML_id), OString::number(m_nNextBookmarkId),
        FSNS(XML_w, XML_name), BookmarkToWord(bookmarkName));
        FSNS(XML_w, XML_name), GetExport().BookmarkToWord(bookmarkName));
}

void DocxAttributeOutput::DoWriteBookmarkTagEnd(sal_Int32 const nId)
@@ -2230,7 +2230,7 @@ void DocxAttributeOutput::DoWriteBookmarkStartIfExist(sal_Int32 nRunPos)
    {
        DoWriteBookmarkTagStart(aIter->second);
        m_rOpenedBookmarksIds[aIter->second] = m_nNextBookmarkId;
        m_sLastOpenedBookmark = BookmarkToWord(aIter->second);
        m_sLastOpenedBookmark = GetExport().BookmarkToWord(aIter->second);
        m_nNextBookmarkId++;
    }
}
@@ -2259,7 +2259,7 @@ void DocxAttributeOutput::DoWriteBookmarksStart(std::vector<OUString>& rStarts, 
        // Output the bookmark (including MoveBookmark of the tracked moving)
        bool bMove = false;
        bool bFrom = false;
        OUString sBookmarkName = BookmarkToWord(bookmarkName, &bMove, &bFrom);
        OUString sBookmarkName = GetExport().BookmarkToWord(bookmarkName, &bMove, &bFrom);
        if ( bMove )
        {
            // TODO: redline data of MoveBookmark is restored from the first redline of the bookmark
@@ -2291,7 +2291,7 @@ void DocxAttributeOutput::DoWriteBookmarksEnd(std::vector<OUString>& rEnds)
        {
            bool bMove = false;
            bool bFrom = false;
            BookmarkToWord(bookmarkName, &bMove, &bFrom);
            GetExport().BookmarkToWord(bookmarkName, &bMove, &bFrom);
            // Output the bookmark (including MoveBookmark of the tracked moving)
            if ( bMove )
                DoWriteMoveRangeTagEnd(pPos->second, bFrom);
@@ -2319,12 +2319,12 @@ void DocxAttributeOutput::DoWritePermissionTagStart(std::u16string_view permissi
    {
        const std::size_t separatorIndex = permissionIdAndName.find(u':');
        assert(separatorIndex != std::u16string_view::npos);
        const std::u16string_view permissionId   = permissionIdAndName.substr(0, separatorIndex);
        const std::u16string_view permissionName = permissionIdAndName.substr(separatorIndex + 1);
        const OUString permissionId(permissionIdAndName.substr(0, separatorIndex));
        const OUString permissionName(permissionIdAndName.substr(separatorIndex + 1));

        m_pSerializer->singleElementNS(XML_w, XML_permStart,
            FSNS(XML_w, XML_id), BookmarkToWord(permissionId),
            FSNS(XML_w, XML_edGrp), BookmarkToWord(permissionName));
            FSNS(XML_w, XML_id), GetExport().BookmarkToWord(permissionId),
            FSNS(XML_w, XML_edGrp), GetExport().BookmarkToWord(permissionName));
    }
    else
    {
@@ -2333,12 +2333,12 @@ void DocxAttributeOutput::DoWritePermissionTagStart(std::u16string_view permissi
        assert(ok); (void)ok;
        const std::size_t separatorIndex = permissionIdAndName.find(u':');
        assert(separatorIndex != std::u16string_view::npos);
        const std::u16string_view permissionId   = permissionIdAndName.substr(0, separatorIndex);
        const std::u16string_view permissionName = permissionIdAndName.substr(separatorIndex + 1);
        const OUString permissionId(permissionIdAndName.substr(0, separatorIndex));
        const OUString permissionName(permissionIdAndName.substr(separatorIndex + 1));

        m_pSerializer->singleElementNS(XML_w, XML_permStart,
            FSNS(XML_w, XML_id), BookmarkToWord(permissionId),
            FSNS(XML_w, XML_ed), BookmarkToWord(permissionName));
            FSNS(XML_w, XML_id), GetExport().BookmarkToWord(permissionId),
            FSNS(XML_w, XML_ed), GetExport().BookmarkToWord(permissionName));
    }
}

@@ -2360,10 +2360,10 @@ void DocxAttributeOutput::DoWritePermissionTagEnd(std::u16string_view permission

    const std::size_t separatorIndex = permissionIdAndName.find(u':');
    assert(separatorIndex != std::u16string_view::npos);
    const std::u16string_view permissionId   = permissionIdAndName.substr(0, separatorIndex);
    const OUString permissionId(permissionIdAndName.substr(0, separatorIndex));

    m_pSerializer->singleElementNS(XML_w, XML_permEnd,
        FSNS(XML_w, XML_id), BookmarkToWord(permissionId));
        FSNS(XML_w, XML_id), GetExport().BookmarkToWord(permissionId));
}

/// Write the start permissions
@@ -3848,6 +3848,8 @@ void DocxAttributeOutput::EndRuby(const SwTextNode& rNode, sal_Int32 nPos)
bool DocxAttributeOutput::AnalyzeURL( const OUString& rUrl, const OUString& rTarget, OUString* pLinkURL, OUString* pMark )
{
    bool bBookMarkOnly = AttributeOutputBase::AnalyzeURL( rUrl, rTarget, pLinkURL, pMark );
    if (bBookMarkOnly)
        *pMark = GetExport().BookmarkToWord(*pMark);

    if (!pMark->isEmpty() && (bBookMarkOnly || rTarget.isEmpty()))
    {
diff --git a/sw/source/filter/ww8/docxattributeoutput.hxx b/sw/source/filter/ww8/docxattributeoutput.hxx
index c0d2c56..e8b255b 100644
--- a/sw/source/filter/ww8/docxattributeoutput.hxx
+++ b/sw/source/filter/ww8/docxattributeoutput.hxx
@@ -770,7 +770,7 @@ protected:

private:

    void DoWriteBookmarkTagStart(std::u16string_view bookmarkName);
    void DoWriteBookmarkTagStart(const OUString& bookmarkName);
    void DoWriteBookmarkTagEnd(sal_Int32 nId);
    void DoWriteMoveRangeTagStart(std::u16string_view bookmarkName,
            bool bFrom, const SwRedlineData* pRedlineData);
diff --git a/sw/source/filter/ww8/wrtw8nds.cxx b/sw/source/filter/ww8/wrtw8nds.cxx
index 8b7a619..d2e5740 100644
--- a/sw/source/filter/ww8/wrtw8nds.cxx
+++ b/sw/source/filter/ww8/wrtw8nds.cxx
@@ -926,15 +926,6 @@ void WW8AttributeOutput::EndRuby(const SwTextNode& /*rNode*/, sal_Int32 /*nPos*/
    m_rWW8Export.OutputField( nullptr, ww::eEQ, OUString(), FieldFlags::End | FieldFlags::Close );
}

/*#i15387# Better ideas welcome*/
static OUString &TruncateBookmark( OUString &rRet )
{
    if ( rRet.getLength() > 40 )
        rRet = rRet.copy( 0, 40 );
    OSL_ENSURE( rRet.getLength() <= 40, "Word cannot have bookmarks longer than 40 chars" );
    return rRet;
}

OUString AttributeOutputBase::ConvertURL( const OUString& rUrl, bool bAbsoluteOut )
{
    OUString sURL = rUrl;
@@ -1029,7 +1020,7 @@ bool WW8AttributeOutput::AnalyzeURL( const OUString& rUrl, const OUString& rTarg
    if (bBookMarkOnly)
    {
        sURL = FieldString(ww::eHYPERLINK);
        *pMark = BookmarkToWord(*pMark);
        *pMark = GetExport().BookmarkToWord(*pMark);
    }
    else
        sURL = FieldString( ww::eHYPERLINK ) + "\"" + sURL + "\"";
@@ -1210,35 +1201,6 @@ bool WW8AttributeOutput::EndURL(bool const)
    return true;
}

OUString BookmarkToWord(std::u16string_view rBookmark, bool* pIsMove, bool* pIsFrom)
{
    sal_Int32 nTrim = 0; // position to remove "__RefMoveRange" from bookmark names
    if ( pIsMove )
    {
        static constexpr OUStringLiteral MoveFrom_Bookmark_NamePrefix = u"__RefMoveFrom__";
        static constexpr OUStringLiteral MoveTo_Bookmark_NamePrefix = u"__RefMoveTo__";
        if ( o3tl::starts_with(rBookmark, MoveFrom_Bookmark_NamePrefix) )
        {
            *pIsMove = true;
            *pIsFrom = true;
            nTrim = MoveFrom_Bookmark_NamePrefix.getLength();
        }
        else if ( o3tl::starts_with(rBookmark, MoveTo_Bookmark_NamePrefix) )
        {
            *pIsMove = true;
            *pIsFrom = false;
            nTrim = MoveTo_Bookmark_NamePrefix.getLength();
        }
    }
    OUString sRet = INetURLObject::encode(
        OUString(rBookmark.substr(nTrim)).replace(' ', '_'), // Spaces are prohibited in bookmark name
        INetURLObject::PART_REL_SEGMENT_EXTRA,
        INetURLObject::EncodeMechanism::All, RTL_TEXTENCODING_ASCII_US);
    // Unicode letters are allowed
    sRet = INetURLObject::decode(sRet, INetURLObject::DecodeMechanism::Unambiguous, RTL_TEXTENCODING_UTF8);
    return TruncateBookmark(sRet);
}

OUString BookmarkToWriter(std::u16string_view rBookmark)
{
    return INetURLObject::decode(rBookmark,
@@ -1248,7 +1210,7 @@ OUString BookmarkToWriter(std::u16string_view rBookmark)
void SwWW8AttrIter::OutSwFormatRefMark(const SwFormatRefMark& rAttr)
{
    if(m_rExport.HasRefToAttr(rAttr.GetRefName()))
        m_rExport.AppendBookmark( MSWordExportBase::GetBookmarkName( REF_SETREFATTR,
        m_rExport.AppendBookmark( m_rExport.GetBookmarkName( REF_SETREFATTR,
                                            &rAttr.GetRefName(), 0 ));
}

diff --git a/sw/source/filter/ww8/wrtww8.cxx b/sw/source/filter/ww8/wrtww8.cxx
index 09855b3..c51b1c3 100644
--- a/sw/source/filter/ww8/wrtww8.cxx
+++ b/sw/source/filter/ww8/wrtww8.cxx
@@ -4565,4 +4565,53 @@ const NfKeywordTable & MSWordExportBase::GetNfKeywordTable()
    return *m_pKeyMap;
}

OUString MSWordExportBase::BookmarkToWord(const OUString& rBookmark, bool* pIsMove, bool* pIsFrom)
{
    OUString sLookup = rBookmark;
    if (pIsMove)
    {
        static constexpr OUStringLiteral MoveFrom_Bookmark_NamePrefix = u"__RefMoveFrom__";
        static constexpr OUStringLiteral MoveTo_Bookmark_NamePrefix = u"__RefMoveTo__";
        if (rBookmark.startsWith(MoveFrom_Bookmark_NamePrefix, &sLookup))
        {
            *pIsMove = true;
            *pIsFrom = true;
        }
        else if (rBookmark.startsWith(MoveTo_Bookmark_NamePrefix, &sLookup))
        {
            *pIsMove = true;
            *pIsFrom = false;
        }
    }
    if (auto it = m_aBookmarkToWord.find(sLookup); it != m_aBookmarkToWord.end())
        return it->second;

    OUString sRet
        = INetURLObject::encode(sLookup.replace(' ', '_'), // Spaces are prohibited in bookmark name
                                INetURLObject::PART_REL_SEGMENT_EXTRA,
                                INetURLObject::EncodeMechanism::All, RTL_TEXTENCODING_ASCII_US);
    // Unicode letters are allowed
    sRet = INetURLObject::decode(sRet, INetURLObject::DecodeMechanism::Unambiguous,
                                 RTL_TEXTENCODING_UTF8);

    /*#i15387#*/
    // Word has 40 character limit for bookmarks: [MS-OE376] Part 4 Sect. 2.13.6.2, bookmarkStart
    if (sRet.getLength() > 40)
    {
        // Generate a unique bookmark name
        sRet = sRet.copy(0, 40);
        for (sal_uInt32 n = 1; n; ++n)
        {
            if (m_aWordBookmarks.find(sRet) == m_aWordBookmarks.end())
                break;
            auto num = OUString::number(n, 36);
            sRet = sRet.subView(0, 40 - num.length) + num;
        }
    }

    m_aBookmarkToWord[sLookup] = sRet;
    m_aWordBookmarks.insert(sRet);
    return sRet;
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/ww8/wrtww8.hxx b/sw/source/filter/ww8/wrtww8.hxx
index 7266bb8..3aa39a2 100644
--- a/sw/source/filter/ww8/wrtww8.hxx
+++ b/sw/source/filter/ww8/wrtww8.hxx
@@ -51,6 +51,7 @@
#include <map>
#include <string_view>
#include <vector>
#include <unordered_map>


class SvxBrushItem;
@@ -639,7 +640,7 @@ public:
    bool HasRefToFootOrEndnote(const bool isEndNote, const sal_uInt16 nSeqNo);

    /// Find the bookmark name.
    static OUString GetBookmarkName( sal_uInt16 nTyp, const OUString* pName, sal_uInt16 nSeqNo );
    OUString GetBookmarkName( sal_uInt16 nTyp, const OUString* pName, sal_uInt16 nSeqNo );

    /// Use OutputItem() on an item set according to the parameters.
    void OutputItemSet( const SfxItemSet& rSet, bool bPapFormat, bool bChpFormat, sal_uInt16 nScript, bool bExportParentItemSet );
@@ -929,9 +930,15 @@ public:

    static void CorrectTabStopInSet( SfxItemSet& rSet, sal_Int32 nAbsLeft );

    OUString BookmarkToWord(const OUString& rBookmark, bool* pIsMove = nullptr,
                            bool* pIsFrom = nullptr);

private:
    MSWordExportBase( const MSWordExportBase& ) = delete;
    MSWordExportBase& operator=( const MSWordExportBase& ) = delete;

    std::unordered_map<OUString, OUString> m_aBookmarkToWord;
    o3tl::sorted_vector<OUString> m_aWordBookmarks;
};

/// The writer class that gets called for the WW8 filter.
@@ -1654,7 +1661,6 @@ public:
sal_Int16 GetWordFirstLineOffset(const SwNumFormat &rFormat);
// A bit of a bag on the side for now
OUString FieldString(ww::eField eIndex);
OUString BookmarkToWord(std::u16string_view rBookmark, bool* pIsMove = nullptr, bool* pIsFrom = nullptr);

class WW8SHDLong
{
diff --git a/sw/source/filter/ww8/ww8atr.cxx b/sw/source/filter/ww8/ww8atr.cxx
index 30df5b2..a794a5d 100644
--- a/sw/source/filter/ww8/ww8atr.cxx
+++ b/sw/source/filter/ww8/ww8atr.cxx
@@ -1149,7 +1149,7 @@ void WW8AttributeOutput::StartRun( const SwRedlineData* pRedlineData, sal_Int32 
    auto aRange = m_aBookmarksOfParagraphStart.equal_range(nPos);
    for( auto aIter = aRange.first; aIter != aRange.second; ++aIter)
    {
        GetExport().AppendBookmark(BookmarkToWord(aIter->second));
        GetExport().AppendBookmark(GetExport().BookmarkToWord(aIter->second));
    }
}

@@ -1165,9 +1165,9 @@ void WW8AttributeOutput::EndRun( const SwTextNode* /*pNode*/, sal_Int32 nPos, sa
    for( auto aIter = aRange.first; aIter != aRange.second; ++aIter)
    {
        if(bLastRun)
            GetExport().AppendBookmarkEndWithCorrection(BookmarkToWord(aIter->second));
            GetExport().AppendBookmarkEndWithCorrection(GetExport().BookmarkToWord(aIter->second));
        else
            GetExport().AppendBookmark(BookmarkToWord(aIter->second));
            GetExport().AppendBookmark(GetExport().BookmarkToWord(aIter->second));
    }
}

@@ -3229,7 +3229,7 @@ void AttributeOutputBase::TextField( const SwFormatField& rField )
                    {
                        const OUString& aRefName(rRField.GetSetRefName());
                        sStr = FieldString(eField)
                            + MSWordExportBase::GetBookmarkName(nSubType, &aRefName, 0);
                               + GetExport().GetBookmarkName(nSubType, &aRefName, 0);
                    }
                    switch (pField->GetFormat())
                    {
@@ -3284,7 +3284,7 @@ void AttributeOutputBase::TextField( const SwFormatField& rField )
                                    eField = ww::eNONE;
                                    break;
                        }
                        sStr = FieldString(eField) + MSWordExportBase::GetBookmarkName(nSubType, &sName, 0);
                        sStr = FieldString(eField) + GetExport().GetBookmarkName(nSubType, &sName, 0);
                    }
                    switch (pField->GetFormat())
                    {
@@ -3317,7 +3317,7 @@ void AttributeOutputBase::TextField( const SwFormatField& rField )
                            break;
                    }
                    sStr = FieldString(eField)
                        + MSWordExportBase::GetBookmarkName(nSubType, nullptr, rRField.GetSeqNo());
                           + GetExport().GetBookmarkName(nSubType, nullptr, rRField.GetSeqNo());
                    break;
            }

@@ -3695,7 +3695,7 @@ void AttributeOutputBase::TextFootnote( const SwFormatFootnote& rFootnote )
    OUString sBkmkNm;
    if ( GetExport().HasRefToFootOrEndnote( rFootnote.IsEndNote(), rFootnote.GetTextFootnote()->GetSeqRefNo()))
    {
        sBkmkNm = MSWordExportBase::GetBookmarkName( nTyp, nullptr,
        sBkmkNm = GetExport().GetBookmarkName(nTyp, nullptr,
                                    rFootnote.GetTextFootnote()->GetSeqRefNo() );
        GetExport().AppendBookmark( sBkmkNm );
    }