tdf#74670 tdf#91286 PPTX XLSX export: save image once

Impress and Calc used to dump the same image file
as many times as it was featured in the document,
resulting redundant, sometimes huge documents.

Note: using only checksum to recognize image duplication
is a regression, because checksum collision results
image loss. This is a very unlikely event, and
the following commits have got the same problem.
The solution is comparing the images with the same
checksum byte for byte.

See also commit b484e9814c66d8d51cea974390963a6944bc9d73
"tdf#83227 oox: reuse RelId in DML/VML export for the same graphic"
and commit 797fef38612fb2fd62d1f6591619b9361e526bca
"tdf#118535 DOCX export: save header image once".

Change-Id: I9f233d521941381746634cf4f9b5991da0dadda9
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/131928
Tested-by: László Németh <nemeth@numbertext.org>
Reviewed-by: László Németh <nemeth@numbertext.org>
diff --git a/include/oox/export/drawingml.hxx b/include/oox/export/drawingml.hxx
index fb125dd..9a7f744 100644
--- a/include/oox/export/drawingml.hxx
+++ b/include/oox/export/drawingml.hxx
@@ -21,7 +21,9 @@
#define INCLUDED_OOX_EXPORT_DRAWINGML_HXX

#include <map>
#include <stack>
#include <string_view>
#include <unordered_map>
#include <vector>

#include <com/sun/star/beans/PropertyState.hpp>
@@ -41,6 +43,7 @@
#include <sax/fshelper.hxx>
#include <svx/msdffdef.hxx>
#include <vcl/checksum.hxx>
#include <vcl/graph.hxx>
#include <tools/gen.hxx>
#include <tools/color.hxx>
#include <vcl/mapmod.hxx>
@@ -150,6 +153,7 @@ private:
    static std::map<OUString, OUString> maWdpCache;
    static sal_Int32 mnDrawingMLCount;
    static sal_Int32 mnVmlCount;
    static std::stack<std::unordered_map<BitmapChecksum, OUString>> maExportGraphics;

    /// To specify where write eg. the images to (like 'ppt', or 'word' - according to the OPC).
    DocumentType meDocumentType;
@@ -342,9 +346,11 @@ public:
    sal_Int32 getBulletMarginIndentation (const css::uno::Reference< css::beans::XPropertySet >& rXPropSet,sal_Int16 nLevel, std::u16string_view propName);

    static void ResetCounters();

    static void ResetMlCounters();

    static void PushExportGraphics();
    static void PopExportGraphics();

    static sal_Int32 getNewDrawingUniqueId() { return ++mnDrawingMLCount; }
    static sal_Int32 getNewVMLUniqueId() { return ++mnVmlCount; }

diff --git a/oox/source/export/drawingml.cxx b/oox/source/export/drawingml.cxx
index a790a64..a99a047 100644
--- a/oox/source/export/drawingml.cxx
+++ b/oox/source/export/drawingml.cxx
@@ -111,7 +111,6 @@
#include <tools/stream.hxx>
#include <unotools/fontdefs.hxx>
#include <vcl/cvtgrf.hxx>
#include <vcl/graph.hxx>
#include <vcl/svapp.hxx>
#include <rtl/strbuf.hxx>
#include <filter/msfilter/escherex.hxx>
@@ -237,6 +236,7 @@ int DrawingML::mnWdpImageCounter = 1;
std::map<OUString, OUString> DrawingML::maWdpCache;
sal_Int32 DrawingML::mnDrawingMLCount = 0;
sal_Int32 DrawingML::mnVmlCount = 0;
std::stack<std::unordered_map<BitmapChecksum, OUString>> DrawingML::maExportGraphics;

sal_Int16 DrawingML::GetScriptType(const OUString& rStr)
{
@@ -275,6 +275,16 @@ void DrawingML::ResetMlCounters()
    mnVmlCount = 0;
}

void DrawingML::PushExportGraphics()
{
    maExportGraphics.emplace();
}

void DrawingML::PopExportGraphics()
{
    maExportGraphics.pop();
}

bool DrawingML::GetProperty( const Reference< XPropertySet >& rXPropertySet, const OUString& aName )
{
    try
@@ -1264,113 +1274,130 @@ const char* DrawingML::GetRelationCompPrefix() const
OUString DrawingML::WriteImage( const Graphic& rGraphic , bool bRelPathToMedia, OUString* pFileName )
{
    GfxLink aLink = rGraphic.GetGfxLink ();
    BitmapChecksum aChecksum = rGraphic.GetChecksum();
    OUString sMediaType;
    const char* pExtension = "";
    OUString sRelId;
    OUString sPath;

    SvMemoryStream aStream;
    const void* aData = aLink.GetData();
    std::size_t nDataSize = aLink.GetDataSize();

    switch ( aLink.GetType() )
    // tdf#74670 tdf#91286 Save image only once (this is no problem for DOCX)
    if (GetDocumentType() != DOCUMENT_DOCX && !maExportGraphics.empty())
    {
        case GfxLinkType::NativeGif:
            sMediaType = "image/gif";
            pExtension = ".gif";
            break;
        auto aIterator = maExportGraphics.top().find(aChecksum);
        if (aIterator != maExportGraphics.top().end())
            sPath = aIterator->second;
    }

        // #i15508# added BMP type for better exports
        // export not yet active, so adding for reference (not checked)
        case GfxLinkType::NativeBmp:
            sMediaType = "image/bmp";
            pExtension = ".bmp";
            break;
    if (sPath.isEmpty())
    {
        SvMemoryStream aStream;
        const void* aData = aLink.GetData();
        std::size_t nDataSize = aLink.GetDataSize();

        case GfxLinkType::NativeJpg:
            sMediaType = "image/jpeg";
            pExtension = ".jpeg";
            break;
        case GfxLinkType::NativePng:
            sMediaType = "image/png";
            pExtension = ".png";
            break;
        case GfxLinkType::NativeTif:
            sMediaType = "image/tiff";
            pExtension = ".tif";
            break;
        case GfxLinkType::NativeWmf:
            sMediaType = "image/x-wmf";
            pExtension = ".wmf";
            break;
        case GfxLinkType::NativeMet:
            sMediaType = "image/x-met";
            pExtension = ".met";
            break;
        case GfxLinkType::NativePct:
            sMediaType = "image/x-pict";
            pExtension = ".pct";
            break;
        case GfxLinkType::NativeMov:
            sMediaType = "application/movie";
            pExtension = ".MOV";
            break;
        default:
        switch (aLink.GetType())
        {
            GraphicType aType = rGraphic.GetType();
            if ( aType == GraphicType::Bitmap || aType == GraphicType::GdiMetafile)
            case GfxLinkType::NativeGif:
                sMediaType = "image/gif";
                pExtension = ".gif";
                break;

            // #i15508# added BMP type for better exports
            // export not yet active, so adding for reference (not checked)
            case GfxLinkType::NativeBmp:
                sMediaType = "image/bmp";
                pExtension = ".bmp";
                break;

            case GfxLinkType::NativeJpg:
                sMediaType = "image/jpeg";
                pExtension = ".jpeg";
                break;
            case GfxLinkType::NativePng:
                sMediaType = "image/png";
                pExtension = ".png";
                break;
            case GfxLinkType::NativeTif:
                sMediaType = "image/tiff";
                pExtension = ".tif";
                break;
            case GfxLinkType::NativeWmf:
                sMediaType = "image/x-wmf";
                pExtension = ".wmf";
                break;
            case GfxLinkType::NativeMet:
                sMediaType = "image/x-met";
                pExtension = ".met";
                break;
            case GfxLinkType::NativePct:
                sMediaType = "image/x-pict";
                pExtension = ".pct";
                break;
            case GfxLinkType::NativeMov:
                sMediaType = "application/movie";
                pExtension = ".MOV";
                break;
            default:
            {
                if ( aType == GraphicType::Bitmap )
                GraphicType aType = rGraphic.GetType();
                if (aType == GraphicType::Bitmap || aType == GraphicType::GdiMetafile)
                {
                    (void)GraphicConverter::Export( aStream, rGraphic, ConvertDataFormat::PNG );
                    sMediaType = "image/png";
                    pExtension = ".png";
                    if (aType == GraphicType::Bitmap)
                    {
                        (void)GraphicConverter::Export(aStream, rGraphic, ConvertDataFormat::PNG);
                        sMediaType = "image/png";
                        pExtension = ".png";
                    }
                    else
                    {
                        (void)GraphicConverter::Export(aStream, rGraphic, ConvertDataFormat::EMF);
                        sMediaType = "image/x-emf";
                        pExtension = ".emf";
                    }
                }
                else
                {
                    (void)GraphicConverter::Export( aStream, rGraphic, ConvertDataFormat::EMF );
                    sMediaType = "image/x-emf";
                    pExtension = ".emf";
                    SAL_WARN("oox.shape", "unhandled graphic type " << static_cast<int>(aType));
                    /*Earlier, even in case of unhandled graphic types we were
                      proceeding to write the image, which would eventually
                      write an empty image with a zero size, and return a valid
                      relationID, which is incorrect.
                      */
                    return sRelId;
                }
            }
            else
            {
                SAL_WARN("oox.shape", "unhandled graphic type " << static_cast<int>(aType) );
                /*Earlier, even in case of unhandled graphic types we were
                  proceeding to write the image, which would eventually
                  write an empty image with a zero size, and return a valid
                  relationID, which is incorrect.
                  */
                return sRelId;
            }

            aData = aStream.GetData();
            nDataSize = aStream.GetEndOfData();
            break;
                aData = aStream.GetData();
                nDataSize = aStream.GetEndOfData();
                break;
            }
        }

        Reference<XOutputStream> xOutStream = mpFB->openFragmentStream(
            OUStringBuffer()
                .appendAscii(GetComponentDir())
                .append("/media/image" + OUString::number(mnImageCounter))
                .appendAscii(pExtension)
                .makeStringAndClear(),
            sMediaType);
        xOutStream->writeBytes(Sequence<sal_Int8>(static_cast<const sal_Int8*>(aData), nDataSize));
        xOutStream->closeOutput();

        const OString sRelPathToMedia = "media/image";
        OString sRelationCompPrefix;
        if (bRelPathToMedia)
            sRelationCompPrefix = "../";
        else
            sRelationCompPrefix = GetRelationCompPrefix();
        sPath = OUStringBuffer()
                    .appendAscii(sRelationCompPrefix.getStr())
                    .appendAscii(sRelPathToMedia.getStr())
                    .append(static_cast<sal_Int32>(mnImageCounter++))
                    .appendAscii(pExtension)
                    .makeStringAndClear();

        if (GetDocumentType() != DOCUMENT_DOCX && !maExportGraphics.empty())
            maExportGraphics.top()[aChecksum] = sPath;
    }

    Reference< XOutputStream > xOutStream = mpFB->openFragmentStream( OUStringBuffer()
                                                                      .appendAscii( GetComponentDir() )
                                                                      .append( "/media/image" +
                                                                        OUString::number(mnImageCounter) )
                                                                      .appendAscii( pExtension )
                                                                      .makeStringAndClear(),
                                                                      sMediaType );
    xOutStream->writeBytes( Sequence< sal_Int8 >( static_cast<const sal_Int8*>(aData), nDataSize ) );
    xOutStream->closeOutput();

    const OString sRelPathToMedia = "media/image";
    OString sRelationCompPrefix;
    if ( bRelPathToMedia )
        sRelationCompPrefix = "../";
    else
        sRelationCompPrefix = GetRelationCompPrefix();
    OUString sPath = OUStringBuffer()
                     .appendAscii( sRelationCompPrefix.getStr() )
                     .appendAscii( sRelPathToMedia.getStr() )
                     .append( static_cast<sal_Int32>(mnImageCounter ++) )
                     .appendAscii( pExtension )
                     .makeStringAndClear();
    sRelId = mpFB->addRelation( mpFS->getOutputStream(),
                                oox::getRelationship(Relationship::IMAGE),
                                sPath );
diff --git a/sc/qa/unit/data/ods/tdf91286.ods b/sc/qa/unit/data/ods/tdf91286.ods
new file mode 100644
index 0000000..8bc2605
--- /dev/null
+++ b/sc/qa/unit/data/ods/tdf91286.ods
Binary files differ
diff --git a/sc/qa/unit/subsequent_export_test2.cxx b/sc/qa/unit/subsequent_export_test2.cxx
index 4d22a2f..ee3c141 100644
--- a/sc/qa/unit/subsequent_export_test2.cxx
+++ b/sc/qa/unit/subsequent_export_test2.cxx
@@ -49,6 +49,7 @@
#include <com/sun/star/drawing/XDrawPages.hpp>
#include <com/sun/star/drawing/XDrawPagesSupplier.hpp>
#include <com/sun/star/frame/Desktop.hpp>
#include <com/sun/star/packages/zip/ZipFileAccess.hpp>
#include <com/sun/star/sheet/GlobalSheetSettings.hpp>
#include <com/sun/star/sheet/XHeaderFooterContent.hpp>
#include <com/sun/star/text/XTextColumns.hpp>
@@ -186,6 +187,7 @@ public:
    void testTdf130104_XLSXIndent();
    void testWholeRowBold();
    void testXlsxRowsOrder();
    void testTdf91286();

    CPPUNIT_TEST_SUITE(ScExportTest2);

@@ -305,6 +307,7 @@ public:
    CPPUNIT_TEST(testTdf130104_XLSXIndent);
    CPPUNIT_TEST(testWholeRowBold);
    CPPUNIT_TEST(testXlsxRowsOrder);
    CPPUNIT_TEST(testTdf91286);

    CPPUNIT_TEST_SUITE_END();

@@ -3110,6 +3113,29 @@ void ScExportTest2::testXlsxRowsOrder()
    xDocSh->DoClose();
}

void ScExportTest2::testTdf91286()
{
    ScDocShellRef xDocSh = loadDoc(u"tdf91286.", FORMAT_ODS);
    CPPUNIT_ASSERT(xDocSh.is());
    std::shared_ptr<utl::TempFile> pTemp = exportTo(*xDocSh, FORMAT_XLSX);
    xDocSh->DoClose();

    Reference<packages::zip::XZipFileAccess2> xNameAccess
        = packages::zip::ZipFileAccess::createWithURL(comphelper::getComponentContext(m_xSFactory),
                                                      pTemp->GetURL());
    const Sequence<OUString> aNames(xNameAccess->getElementNames());
    int nImageFiles = 0;
    for (const auto& rElementName : aNames)
        if (rElementName.startsWith("xl/media/image"))
            nImageFiles++;

    // Without the accompanying fix in place, this test would have failed with:
    // - Expected: 1
    // - Actual  : 2
    // i.e. the embedded picture would have been saved twice.
    CPPUNIT_ASSERT_EQUAL(1, nImageFiles);
}

CPPUNIT_TEST_SUITE_REGISTRATION(ScExportTest2);

CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/filter/excel/xestream.cxx b/sc/source/filter/excel/xestream.cxx
index 08048e9..b949592 100644
--- a/sc/source/filter/excel/xestream.cxx
+++ b/sc/source/filter/excel/xestream.cxx
@@ -1023,6 +1023,7 @@ bool XclExpXmlStream::exportDocument()
    // Instead, write via XOutputStream instance.
    tools::SvRef<SotStorage> rStorage = static_cast<SotStorage*>(nullptr);
    drawingml::DrawingML::ResetMlCounters();
    drawingml::DrawingML::PushExportGraphics();

    XclExpRootData aData(
        EXC_BIFF8, *pShell->GetMedium (), rStorage, rDoc,
@@ -1124,6 +1125,9 @@ bool XclExpXmlStream::exportDocument()
    if (xStatusIndicator.is())
        xStatusIndicator->end();
    mpRoot = nullptr;

    drawingml::DrawingML::PopExportGraphics();

    return true;
}

diff --git a/sd/qa/unit/data/odp/tdf74670.odp b/sd/qa/unit/data/odp/tdf74670.odp
new file mode 100644
index 0000000..98e223e
--- /dev/null
+++ b/sd/qa/unit/data/odp/tdf74670.odp
Binary files differ
diff --git a/sd/qa/unit/export-tests-ooxml3.cxx b/sd/qa/unit/export-tests-ooxml3.cxx
index 776d436..3f4bec3 100644
--- a/sd/qa/unit/export-tests-ooxml3.cxx
+++ b/sd/qa/unit/export-tests-ooxml3.cxx
@@ -106,6 +106,7 @@ public:
    void testTdf147121();
    void testTdf140912_PicturePlaceholder();
    void testEnhancedPathViewBox();
    void testTdf74670();

    CPPUNIT_TEST_SUITE(SdOOXMLExportTest3);

@@ -182,6 +183,7 @@ public:
    CPPUNIT_TEST(testTdf147121);
    CPPUNIT_TEST(testTdf140912_PicturePlaceholder);
    CPPUNIT_TEST(testEnhancedPathViewBox);
    CPPUNIT_TEST(testTdf74670);
    CPPUNIT_TEST_SUITE_END();

    virtual void registerNamespaces(xmlXPathContextPtr& pXmlXPathCtx) override
@@ -1893,6 +1895,30 @@ void SdOOXMLExportTest3::testEnhancedPathViewBox()
    CPPUNIT_ASSERT_EQUAL(sal_Int32(2045), aBoundRectangle.Width);
}

void SdOOXMLExportTest3::testTdf74670()
{
    ::sd::DrawDocShellRef xDocShRef
        = loadURL(m_directories.getURLFromSrc(u"/sd/qa/unit/data/odp/tdf74670.odp"), ODP);
    utl::TempFile tmpfile;
    xDocShRef = saveAndReload(xDocShRef.get(), PPTX, &tmpfile);
    xDocShRef->DoClose();

    uno::Reference<packages::zip::XZipFileAccess2> xNameAccess
        = packages::zip::ZipFileAccess::createWithURL(comphelper::getComponentContext(m_xSFactory),
                                                      tmpfile.GetURL());
    const uno::Sequence<OUString> aNames(xNameAccess->getElementNames());
    int nImageFiles = 0;
    for (const auto& rElementName : aNames)
        if (rElementName.startsWith("ppt/media/image"))
            nImageFiles++;

    // Without the accompanying fix in place, this test would have failed with:
    // - Expected: 1
    // - Actual  : 2
    // i.e. the embedded picture would have been saved twice.
    CPPUNIT_ASSERT_EQUAL(1, nImageFiles);
}

CPPUNIT_TEST_SUITE_REGISTRATION(SdOOXMLExportTest3);

CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sd/source/filter/eppt/pptx-epptooxml.cxx b/sd/source/filter/eppt/pptx-epptooxml.cxx
index 07be652..8823cbd 100644
--- a/sd/source/filter/eppt/pptx-epptooxml.cxx
+++ b/sd/source/filter/eppt/pptx-epptooxml.cxx
@@ -428,6 +428,7 @@ bool PowerPointExport::importDocument() noexcept
bool PowerPointExport::exportDocument()
{
    DrawingML::ResetCounters();
    DrawingML::PushExportGraphics();
    maShapeMap.clear();

    mXModel = getModel();
@@ -504,6 +505,7 @@ bool PowerPointExport::exportDocument()

    commitStorage();

    DrawingML::PopExportGraphics();
    maShapeMap.clear();
    maAuthors.clear();
    maRelId.clear();