tdf#125298 DOCX export: fix bookmark name truncation

The 40-character bookmark name truncation for MSO was
applied on percent encoded Unicode letters, resulting
shorter bookmark names, than needed (for example,
only 6 letters instead of 40).

This is a clean-up of the commit b9afb9959c31c3c57d0f2fe91107a92abfd82cdb
"tdf#113483: DOCX: fix encoding of bookmarks with non-ASCII letters".

Change-Id: I6fbcdc47c57ef228abcf29e0a68d3d8bb66c5661
Reviewed-on: https://gerrit.libreoffice.org/72456
Tested-by: László Németh <nemeth@numbertext.org>
Tested-by: Jenkins
Reviewed-by: László Németh <nemeth@numbertext.org>
diff --git a/sw/qa/extras/ooxmlexport/data/tdf125298_crossreflink_nonascii_charlimit.docx b/sw/qa/extras/ooxmlexport/data/tdf125298_crossreflink_nonascii_charlimit.docx
new file mode 100644
index 0000000..d157399
--- /dev/null
+++ b/sw/qa/extras/ooxmlexport/data/tdf125298_crossreflink_nonascii_charlimit.docx
Binary files differ
diff --git a/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx b/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx
index ff2030d..b056ad1 100644
--- a/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx
+++ b/sw/qa/extras/ooxmlexport/ooxmlexport13.cxx
@@ -277,10 +277,35 @@ DECLARE_OOXMLEXPORT_TEST(testTdf113483, "tdf113483_crossreflink_nonascii_bookmar
    xmlDocPtr pXmlDoc = parseExport("word/document.xml");
    if (!pXmlDoc)
        return;
    // check whether test file keeps non-ascii values or not
    assertXPath(pXmlDoc, "/w:document/w:body/w:p[1]/w:bookmarkStart[1]", "name", OUString::fromUtf8("Els\u0151"));
    assertXPathContent(pXmlDoc, "/w:document/w:body/w:p[5]/w:r[2]/w:instrText[1]", OUString::fromUtf8(" REF Els\u0151 \\h "));
}

//tdf#125298: fix charlimit restrictions in bookmarknames and field references if they contain non-ascii characters
DECLARE_OOXMLEXPORT_TEST(testTdf125298, "tdf125298_crossreflink_nonascii_charlimit.docx")
{
    xmlDocPtr pXmlDoc = parseExport("word/document.xml");
    if (!pXmlDoc)
        return;
    // check whether test file keeps non-ascii values or not
    OUString bookmarkName1 = getXPath(pXmlDoc, "/w:document/w:body/w:p[1]/w:bookmarkStart[1]", "name");
    CPPUNIT_ASSERT_EQUAL(OUString::fromUtf8("\u00e1rv\u00edzt\u0171r\u0151_t\u00fck\u00f6rf\u00far\u00f3g\u00e9p"), bookmarkName1);

    OUString bookmarkName2 = getXPath(pXmlDoc, "/w:document/w:body/w:p[3]/w:bookmarkStart[1]", "name");
    CPPUNIT_ASSERT_EQUAL(OUString::fromUtf8("\u00e91\u00e12\u01713\u01514\u00fa5\u00f66\u00fc7\u00f38\u00ed9"), bookmarkName2);
    OUString fieldName1 = getXPathContent(pXmlDoc, "/w:document/w:body/w:p[5]/w:r[2]/w:instrText[1]");
    OUString expectedFieldName1(" REF ");
    expectedFieldName1 += bookmarkName1;
    expectedFieldName1 += " \\h ";
    CPPUNIT_ASSERT_EQUAL(expectedFieldName1, fieldName1);
    OUString fieldName2 = getXPathContent(pXmlDoc, "/w:document/w:body/w:p[7]/w:r[2]/w:instrText[1]");
    OUString expectedFieldName2(" REF ");
    expectedFieldName2 += bookmarkName2;
    expectedFieldName2 += " \\h ";
    CPPUNIT_ASSERT_EQUAL(expectedFieldName2, fieldName2);
}

CPPUNIT_PLUGIN_IMPLEMENT();

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/ww8/docxattributeoutput.cxx b/sw/source/filter/ww8/docxattributeoutput.cxx
index d351101..f47d539 100644
--- a/sw/source/filter/ww8/docxattributeoutput.cxx
+++ b/sw/source/filter/ww8/docxattributeoutput.cxx
@@ -1612,7 +1612,7 @@ void DocxAttributeOutput::DoWriteBookmarkTagStart(const OUString & bookmarkName)
{
    m_pSerializer->singleElementNS(XML_w, XML_bookmarkStart,
        FSNS(XML_w, XML_id), OString::number(m_nNextBookmarkId),
        FSNS(XML_w, XML_name), INetURLObject::decode(BookmarkToWord(bookmarkName), INetURLObject::DecodeMechanism::Unambiguous, RTL_TEXTENCODING_UTF8).toUtf8());
        FSNS(XML_w, XML_name), BookmarkToWord(bookmarkName).toUtf8());
}

void DocxAttributeOutput::DoWriteBookmarkTagEnd(const OUString & bookmarkName)
@@ -1981,12 +1981,6 @@ void DocxAttributeOutput::CmdField_Impl( const SwTextNode* pNode, sal_Int32 nPos
               sToken = sToken.replaceAll("NNNN", "dddd");
               sToken = sToken.replaceAll("NN", "ddd");
            }
            //tdf#113483: fix non-ascii characters inside instrText xml tags
            else if ( rInfos.eType ==  ww::eREF
              || rInfos.eType ==  ww::ePAGEREF )
            {
                sToken = INetURLObject::decode(sToken, INetURLObject::DecodeMechanism::Unambiguous, RTL_TEXTENCODING_UTF8);
            }

            // Write the Field command
            DoWriteCmd( sToken );
diff --git a/sw/source/filter/ww8/wrtw8nds.cxx b/sw/source/filter/ww8/wrtw8nds.cxx
index 5ea8e4a..e5b8ece7 100644
--- a/sw/source/filter/ww8/wrtw8nds.cxx
+++ b/sw/source/filter/ww8/wrtw8nds.cxx
@@ -1157,6 +1157,8 @@ OUString BookmarkToWord(const OUString &rBookmark)
        rBookmark.replace(' ', '_'), // Spaces are prohibited in bookmark name
        INetURLObject::PART_REL_SEGMENT_EXTRA,
        INetURLObject::EncodeMechanism::All, RTL_TEXTENCODING_ASCII_US));
    // Unicode letters are allowed
    sRet = INetURLObject::decode(sRet, INetURLObject::DecodeMechanism::Unambiguous, RTL_TEXTENCODING_UTF8);
    return TruncateBookmark(sRet);
}