tdf#137894 separate associated character properties

in ww8filter/RtfAttributeOutput and treat \dbch as
CJK and \hich Western in order to roundtrip the
RTF document.

ww8filter mix all the associated style, including
properties for CJK and CTL scripts.

Both RtfAttributeOutput::CharFontCJK and
RtfAttributeOutput::CharFontCTL output \dbch,
that result in incorrect assocation.

CharFontCTL should use \rtlch, but it was already in
RtfAttributeOutput::MoveCharacterProperties.

To make the order correct, I separate the
associated character properties that were
stored in m_aSyltesAssoc into m_aSyltesAssocRtlch,
and m_aSyltesAssocDbch by their script types.

Note that it is not clear what associated character
properties that we should adopt for \hich and \ltrch.

In theory RTL scripts can output high ANSI chars too,
so \hich may get properties from either Western or
CTL scripts. However, examining Hebrew RTF documents,
I didn't see any sign that \hich is used in that way.

Use RTL as CTL might be a problem for Mongolian,
Manchu and Xibe. They are CTL but top-to-bottom (aka LTR)
. But I don't think they will be expressed
as high ANSI chars either.

Change-Id: I214edbb00a67c2ffe19c5a37254c8988a0828f40
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/106355
Tested-by: Jenkins
Reviewed-by: Miklos Vajna <vmiklos@collabora.com>
diff --git a/sw/qa/extras/rtfexport/data/tdf137894.odt b/sw/qa/extras/rtfexport/data/tdf137894.odt
new file mode 100755
index 0000000..79c7eb1
--- /dev/null
+++ b/sw/qa/extras/rtfexport/data/tdf137894.odt
Binary files differ
diff --git a/sw/qa/extras/rtfexport/rtfexport5.cxx b/sw/qa/extras/rtfexport/rtfexport5.cxx
index a48adfa6..0d921ed 100644
--- a/sw/qa/extras/rtfexport/rtfexport5.cxx
+++ b/sw/qa/extras/rtfexport/rtfexport5.cxx
@@ -1260,6 +1260,22 @@ DECLARE_RTFEXPORT_TEST(testTdf138210, "tdf138210.rtf")
    CPPUNIT_ASSERT_EQUAL(sal_Int32(1), xIndexAccess->getCount());
}

DECLARE_RTFEXPORT_TEST(testTdf137894, "tdf137894.odt")
{
    lang::Locale locale1(getProperty<lang::Locale>(getRun(getParagraph(1), 1), "CharLocaleAsian"));
    CPPUNIT_ASSERT_EQUAL(OUString("ja"), locale1.Language);
    CPPUNIT_ASSERT_EQUAL(OUString("MS UI Gothic"),
                         getProperty<OUString>(getRun(getParagraph(1), 1), "CharFontNameAsian"));
    CPPUNIT_ASSERT_EQUAL(20.f, getProperty<float>(getRun(getParagraph(1), 1), "CharHeightAsian"));
    CPPUNIT_ASSERT_EQUAL(OUString("Mangal"),
                         getProperty<OUString>(getRun(getParagraph(1), 1), "CharFontNameComplex"));
    CPPUNIT_ASSERT_EQUAL(20.f, getProperty<float>(getRun(getParagraph(1), 1), "CharHeightComplex"));
    lang::Locale locale2(
        getProperty<lang::Locale>(getRun(getParagraph(2), 1), "CharLocaleComplex"));
    CPPUNIT_ASSERT_EQUAL(OUString("he"), locale2.Language);
    CPPUNIT_ASSERT_EQUAL(32.f, getProperty<float>(getRun(getParagraph(2), 1), "CharHeightComplex"));
}

CPPUNIT_PLUGIN_IMPLEMENT();

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/sw/source/filter/ww8/rtfattributeoutput.cxx b/sw/source/filter/ww8/rtfattributeoutput.cxx
index 9e76b3d..23dc3af 100644
--- a/sw/source/filter/ww8/rtfattributeoutput.cxx
+++ b/sw/source/filter/ww8/rtfattributeoutput.cxx
@@ -423,50 +423,59 @@ void RtfAttributeOutput::EndRunProperties(const SwRedlineData* /*pRedlineData*/)

OString RtfAttributeOutput::MoveCharacterProperties(bool aAutoWriteRtlLtr)
{
    const OString aAssoc = m_aStylesAssoc.makeStringAndClear();
    const OString aAssocHich = m_aStylesAssocHich.makeStringAndClear();
    const OString aAssocDbch = m_aStylesAssocDbch.makeStringAndClear();
    const OString aAssocRtlch = m_aStylesAssocRtlch.makeStringAndClear();
    const OString aAssocLtrch = m_aStylesAssocLtrch.makeStringAndClear();
    const OString aNormal = m_aStyles.makeStringAndClear();
    OStringBuffer aBuf;

    if (aAutoWriteRtlLtr && !m_bControlLtrRtl)
    {
        m_bControlLtrRtl = !aAssoc.isEmpty();
        m_bControlLtrRtl = !aAssocRtlch.isEmpty();
        m_bIsRTL = false;
        m_nScript = i18n::ScriptType::LATIN;
    }

    if (m_bIsRTL)
    {
        if (!aAssocRtlch.isEmpty())
        {
            aBuf.append(OOO_STRING_SVTOOLS_RTF_LTRCH)
                .append(aAssocLtrch)
                .append(' ')
                .append(OOO_STRING_SVTOOLS_RTF_RTLCH)
                .append(aAssocRtlch);
        }
    }
    else
    {
        if (!aAssocRtlch.isEmpty())
        {
            aBuf.append(OOO_STRING_SVTOOLS_RTF_RTLCH)
                .append(aAssocRtlch)
                .append(' ')
                .append(OOO_STRING_SVTOOLS_RTF_LTRCH)
                .append(aAssocLtrch);
        }
        if (!aAssocHich.isEmpty())
        {
            aBuf.append(OOO_STRING_SVTOOLS_RTF_HICH).append(aAssocHich);
        }
        if (!aNormal.isEmpty())
        {
            aBuf.append(OOO_STRING_SVTOOLS_RTF_LOCH).append(aNormal);
        }
        if (!aAssocDbch.isEmpty())
        {
            aBuf.append(OOO_STRING_SVTOOLS_RTF_DBCH).append(aAssocDbch);
        }
    }

    if (m_bControlLtrRtl)
    {
        m_bControlLtrRtl = false;

        /*
           You would have thought that
           m_rExport.Strm() << (bIsRTL ? OOO_STRING_SVTOOLS_RTF_RTLCH : OOO_STRING_SVTOOLS_RTF_LTRCH); would be sufficient here ,
           but looks like word needs to see the other directional token to be
           satisfied that all is kosher, otherwise it seems in ver 2003 to go and
           semi-randomly stick strike through about the place. Perhaps
           strikethrough is some ms developers "something is wrong signal" debugging
           code that we're triggering ?
           */
        if (!aAssoc.isEmpty() || !aNormal.isEmpty())
        {
            if (m_bIsRTL)
            {
                aBuf.append(OOO_STRING_SVTOOLS_RTF_LTRCH)
                    .append(aAssoc)
                    .append(' ')
                    .append(OOO_STRING_SVTOOLS_RTF_RTLCH)
                    .append(aNormal);
            }
            else
            {
                aBuf.append(OOO_STRING_SVTOOLS_RTF_RTLCH)
                    .append(aAssoc)
                    .append(' ')
                    .append(OOO_STRING_SVTOOLS_RTF_LTRCH)
                    .append(aNormal);
            }
        }

        switch (m_nScript)
        {
            case i18n::ScriptType::LATIN:
@@ -482,10 +491,6 @@ OString RtfAttributeOutput::MoveCharacterProperties(bool aAutoWriteRtlLtr)
                break;
        }
    }
    else
    {
        aBuf.append(aAssoc).append(aNormal);
    }

    return aBuf.makeStringAndClear();
}
@@ -2454,18 +2459,13 @@ void RtfAttributeOutput::CharEscapement(const SvxEscapementItem& rEscapement)

void RtfAttributeOutput::CharFont(const SvxFontItem& rFont)
{
    m_aStyles.append(OOO_STRING_SVTOOLS_RTF_LOCH);
    // Insert \loch in MoveCharacterProperties
    m_aStyles.append(OOO_STRING_SVTOOLS_RTF_F);
    m_aStyles.append(static_cast<sal_Int32>(m_rExport.m_aFontHelper.GetId(rFont)));

    if (!m_rExport.HasItem(RES_CHRATR_CJK_FONT) && !m_rExport.HasItem(RES_CHRATR_CTL_FONT))
    {
        // Be explicit about that the given font should be used everywhere, not
        // just for the loch range.
        m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_HICH);
        m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_AF);
        m_aStylesAssoc.append(static_cast<sal_Int32>(m_rExport.m_aFontHelper.GetId(rFont)));
    }
    // Insert \hich in MoveCharacterProperties
    m_aStylesAssocHich.append(OOO_STRING_SVTOOLS_RTF_AF);
    m_aStylesAssocHich.append(static_cast<sal_Int32>(m_rExport.m_aFontHelper.GetId(rFont)));

    // FIXME: this may be a tad expensive... but the charset needs to be
    // consistent with what wwFont::WriteRtf() does
@@ -2486,12 +2486,12 @@ void RtfAttributeOutput::CharFontSize(const SvxFontHeightItem& rFontSize)
            m_aStyles.append(static_cast<sal_Int32>(rFontSize.GetHeight() / 10));
            break;
        case RES_CHRATR_CJK_FONTSIZE:
            m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_FS);
            m_aStylesAssoc.append(static_cast<sal_Int32>(rFontSize.GetHeight() / 10));
            m_aStylesAssocDbch.append(OOO_STRING_SVTOOLS_RTF_AFS);
            m_aStylesAssocDbch.append(static_cast<sal_Int32>(rFontSize.GetHeight() / 10));
            break;
        case RES_CHRATR_CTL_FONTSIZE:
            m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_AFS);
            m_aStylesAssoc.append(static_cast<sal_Int32>(rFontSize.GetHeight() / 10));
            m_aStylesAssocRtlch.append(OOO_STRING_SVTOOLS_RTF_AFS);
            m_aStylesAssocRtlch.append(static_cast<sal_Int32>(rFontSize.GetHeight() / 10));
            break;
    }
}
@@ -2513,15 +2513,21 @@ void RtfAttributeOutput::CharLanguage(const SvxLanguageItem& rLanguage)
            m_aStyles.append(OOO_STRING_SVTOOLS_RTF_LANG);
            m_aStyles.append(
                static_cast<sal_Int32>(static_cast<sal_uInt16>(rLanguage.GetLanguage())));
            m_aStylesAssocLtrch.append(OOO_STRING_SVTOOLS_RTF_LANG);
            m_aStylesAssocLtrch.append(
                static_cast<sal_Int32>(static_cast<sal_uInt16>(rLanguage.GetLanguage())));
            break;
        case RES_CHRATR_CJK_LANGUAGE:
            m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_LANGFE);
            m_aStylesAssoc.append(
            m_aStylesAssocDbch.append(OOO_STRING_SVTOOLS_RTF_LANGFE);
            m_aStylesAssocDbch.append(
                static_cast<sal_Int32>(static_cast<sal_uInt16>(rLanguage.GetLanguage())));
            m_aStylesAssocLtrch.append(OOO_STRING_SVTOOLS_RTF_LANGFE);
            m_aStylesAssocLtrch.append(
                static_cast<sal_Int32>(static_cast<sal_uInt16>(rLanguage.GetLanguage())));
            break;
        case RES_CHRATR_CTL_LANGUAGE:
            m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_ALANG);
            m_aStylesAssoc.append(
            m_aStylesAssocRtlch.append(OOO_STRING_SVTOOLS_RTF_ALANG);
            m_aStylesAssocRtlch.append(
                static_cast<sal_Int32>(static_cast<sal_uInt16>(rLanguage.GetLanguage())));
            break;
    }
@@ -2645,9 +2651,9 @@ void RtfAttributeOutput::CharBackground(const SvxBrushItem& rBrush)

void RtfAttributeOutput::CharFontCJK(const SvxFontItem& rFont)
{
    m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_DBCH);
    m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_AF);
    m_aStylesAssoc.append(static_cast<sal_Int32>(m_rExport.m_aFontHelper.GetId(rFont)));
    // Insert \dbch in MoveCharacterProperties
    m_aStylesAssocDbch.append(OOO_STRING_SVTOOLS_RTF_AF);
    m_aStylesAssocDbch.append(static_cast<sal_Int32>(m_rExport.m_aFontHelper.GetId(rFont)));
}

void RtfAttributeOutput::CharFontSizeCJK(const SvxFontHeightItem& rFontSize)
@@ -2662,23 +2668,23 @@ void RtfAttributeOutput::CharLanguageCJK(const SvxLanguageItem& rLanguageItem)

void RtfAttributeOutput::CharPostureCJK(const SvxPostureItem& rPosture)
{
    m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_I);
    m_aStylesAssocDbch.append(OOO_STRING_SVTOOLS_RTF_I);
    if (rPosture.GetPosture() == ITALIC_NONE)
        m_aStylesAssoc.append(sal_Int32(0));
        m_aStylesAssocDbch.append(sal_Int32(0));
}

void RtfAttributeOutput::CharWeightCJK(const SvxWeightItem& rWeight)
{
    m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_B);
    m_aStylesAssocDbch.append(OOO_STRING_SVTOOLS_RTF_AB);
    if (rWeight.GetWeight() != WEIGHT_BOLD)
        m_aStylesAssoc.append(sal_Int32(0));
        m_aStylesAssocDbch.append(sal_Int32(0));
}

void RtfAttributeOutput::CharFontCTL(const SvxFontItem& rFont)
{
    m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_DBCH);
    m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_AF);
    m_aStylesAssoc.append(static_cast<sal_Int32>(m_rExport.m_aFontHelper.GetId(rFont)));
    // Insert \rtlch in MoveCharacterProperties
    m_aStylesAssocRtlch.append(OOO_STRING_SVTOOLS_RTF_AF);
    m_aStylesAssocRtlch.append(static_cast<sal_Int32>(m_rExport.m_aFontHelper.GetId(rFont)));
}

void RtfAttributeOutput::CharFontSizeCTL(const SvxFontHeightItem& rFontSize)
@@ -2693,16 +2699,16 @@ void RtfAttributeOutput::CharLanguageCTL(const SvxLanguageItem& rLanguageItem)

void RtfAttributeOutput::CharPostureCTL(const SvxPostureItem& rPosture)
{
    m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_AI);
    m_aStylesAssocRtlch.append(OOO_STRING_SVTOOLS_RTF_AI);
    if (rPosture.GetPosture() == ITALIC_NONE)
        m_aStylesAssoc.append(sal_Int32(0));
        m_aStylesAssocRtlch.append(sal_Int32(0));
}

void RtfAttributeOutput::CharWeightCTL(const SvxWeightItem& rWeight)
{
    m_aStylesAssoc.append(OOO_STRING_SVTOOLS_RTF_AB);
    m_aStylesAssocRtlch.append(OOO_STRING_SVTOOLS_RTF_AB);
    if (rWeight.GetWeight() != WEIGHT_BOLD)
        m_aStylesAssoc.append(sal_Int32(0));
        m_aStylesAssocRtlch.append(sal_Int32(0));
}

void RtfAttributeOutput::CharBidiRTL(const SfxPoolItem& /*rItem*/) {}
diff --git a/sw/source/filter/ww8/rtfattributeoutput.hxx b/sw/source/filter/ww8/rtfattributeoutput.hxx
index 2493ce3..246ab4a 100644
--- a/sw/source/filter/ww8/rtfattributeoutput.hxx
+++ b/sw/source/filter/ww8/rtfattributeoutput.hxx
@@ -525,7 +525,11 @@ private:
    /*
     * This is the same as m_aStyles but the contents of it is Assoc.
     */
    OStringBuffer m_aStylesAssoc;
    OStringBuffer m_aStylesAssocHich;
    OStringBuffer m_aStylesAssocDbch;
    OStringBuffer m_aStylesAssocRtlch;
    OStringBuffer m_aStylesAssocLtrch;

    bool m_bIsRTL;
    sal_uInt16 m_nScript;
    bool m_bControlLtrRtl;
diff --git a/writerfilter/source/rtftok/rtfdispatchvalue.cxx b/writerfilter/source/rtftok/rtfdispatchvalue.cxx
index b43e85a..930c613 100644
--- a/writerfilter/source/rtftok/rtfdispatchvalue.cxx
+++ b/writerfilter/source/rtftok/rtfdispatchvalue.cxx
@@ -164,13 +164,13 @@ bool RTFDocumentImpl::dispatchCharacterSprmValue(RTFKeyword nKeyword, int nParam
                case RTFParserState::RunType::HICH:
                case RTFParserState::RunType::RTLCH_LTRCH_1:
                case RTFParserState::RunType::LTRCH_RTLCH_2:
                case RTFParserState::RunType::DBCH:
                    nSprm = NS_ooxml::LN_EG_RPrBase_szCs;
                    break;
                case RTFParserState::RunType::NONE:
                case RTFParserState::RunType::LOCH:
                case RTFParserState::RunType::LTRCH_RTLCH_1:
                case RTFParserState::RunType::RTLCH_LTRCH_2:
                case RTFParserState::RunType::DBCH:
                default:
                    nSprm = NS_ooxml::LN_EG_RPrBase_sz;
                    break;
@@ -762,7 +762,6 @@ RTFError RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
        case RTF_AF:
            switch (m_aStates.top().getRunType())
            {
                case RTFParserState::RunType::HICH:
                case RTFParserState::RunType::RTLCH_LTRCH_1:
                case RTFParserState::RunType::LTRCH_RTLCH_2:
                    nSprm = NS_ooxml::LN_CT_Fonts_cs;
@@ -772,6 +771,7 @@ RTFError RTFDocumentImpl::dispatchValue(RTFKeyword nKeyword, int nParam)
                    break;
                case RTFParserState::RunType::NONE:
                case RTFParserState::RunType::LOCH:
                case RTFParserState::RunType::HICH:
                case RTFParserState::RunType::LTRCH_RTLCH_1:
                case RTFParserState::RunType::RTLCH_LTRCH_2:
                default: