tdf#74670 tdf#91286 PPTX XLSX export: save image once
Impress and Calc used to dump the same image file
as many times as it was featured in the document,
resulting redundant, sometimes huge documents.
Note: using only checksum to recognize image duplication
is a regression, because checksum collision results
image loss. This is a very unlikely event, and
the following commits have got the same problem.
The solution is comparing the images with the same
checksum byte for byte.
See also commit b484e9814c66d8d51cea974390963a6944bc9d73
"tdf#83227 oox: reuse RelId in DML/VML export for the same graphic"
and commit 797fef38612fb2fd62d1f6591619b9361e526bca
"tdf#118535 DOCX export: save header image once".
Change-Id: I9f233d521941381746634cf4f9b5991da0dadda9
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/131928
Tested-by: László Németh <nemeth@numbertext.org>
Reviewed-by: László Németh <nemeth@numbertext.org>
diff --git a/include/oox/export/drawingml.hxx b/include/oox/export/drawingml.hxx
index fb125dd..9a7f744 100644
--- a/include/oox/export/drawingml.hxx
+++ b/include/oox/export/drawingml.hxx
@@ -21,7 +21,9 @@
#define INCLUDED_OOX_EXPORT_DRAWINGML_HXX
#include <map>
#include <stack>
#include <string_view>
#include <unordered_map>
#include <vector>
#include <com/sun/star/beans/PropertyState.hpp>
@@ -41,6 +43,7 @@
#include <sax/fshelper.hxx>
#include <svx/msdffdef.hxx>
#include <vcl/checksum.hxx>
#include <vcl/graph.hxx>
#include <tools/gen.hxx>
#include <tools/color.hxx>
#include <vcl/mapmod.hxx>
@@ -150,6 +153,7 @@ private:
static std::map<OUString, OUString> maWdpCache;
static sal_Int32 mnDrawingMLCount;
static sal_Int32 mnVmlCount;
static std::stack<std::unordered_map<BitmapChecksum, OUString>> maExportGraphics;
/// To specify where write eg. the images to (like 'ppt', or 'word' - according to the OPC).
DocumentType meDocumentType;
@@ -342,9 +346,11 @@ public:
sal_Int32 getBulletMarginIndentation (const css::uno::Reference< css::beans::XPropertySet >& rXPropSet,sal_Int16 nLevel, std::u16string_view propName);
static void ResetCounters();
static void ResetMlCounters();
static void PushExportGraphics();
static void PopExportGraphics();
static sal_Int32 getNewDrawingUniqueId() { return ++mnDrawingMLCount; }
static sal_Int32 getNewVMLUniqueId() { return ++mnVmlCount; }
diff --git a/oox/source/export/drawingml.cxx b/oox/source/export/drawingml.cxx
index a790a64..a99a047 100644
--- a/oox/source/export/drawingml.cxx
+++ b/oox/source/export/drawingml.cxx
@@ -111,7 +111,6 @@
#include <tools/stream.hxx>
#include <unotools/fontdefs.hxx>
#include <vcl/cvtgrf.hxx>
#include <vcl/graph.hxx>
#include <vcl/svapp.hxx>
#include <rtl/strbuf.hxx>
#include <filter/msfilter/escherex.hxx>
@@ -237,6 +236,7 @@ int DrawingML::mnWdpImageCounter = 1;
std::map<OUString, OUString> DrawingML::maWdpCache;
sal_Int32 DrawingML::mnDrawingMLCount = 0;
sal_Int32 DrawingML::mnVmlCount = 0;
std::stack<std::unordered_map<BitmapChecksum, OUString>> DrawingML::maExportGraphics;
sal_Int16 DrawingML::GetScriptType(const OUString& rStr)
{
@@ -275,6 +275,16 @@ void DrawingML::ResetMlCounters()
mnVmlCount = 0;
}
void DrawingML::PushExportGraphics()
{
maExportGraphics.emplace();
}
void DrawingML::PopExportGraphics()
{
maExportGraphics.pop();
}
bool DrawingML::GetProperty( const Reference< XPropertySet >& rXPropertySet, const OUString& aName )
{
try
@@ -1264,113 +1274,130 @@ const char* DrawingML::GetRelationCompPrefix() const
OUString DrawingML::WriteImage( const Graphic& rGraphic , bool bRelPathToMedia, OUString* pFileName )
{
GfxLink aLink = rGraphic.GetGfxLink ();
BitmapChecksum aChecksum = rGraphic.GetChecksum();
OUString sMediaType;
const char* pExtension = "";
OUString sRelId;
OUString sPath;
SvMemoryStream aStream;
const void* aData = aLink.GetData();
std::size_t nDataSize = aLink.GetDataSize();
switch ( aLink.GetType() )
// tdf#74670 tdf#91286 Save image only once (this is no problem for DOCX)
if (GetDocumentType() != DOCUMENT_DOCX && !maExportGraphics.empty())
{
case GfxLinkType::NativeGif:
sMediaType = "image/gif";
pExtension = ".gif";
break;
auto aIterator = maExportGraphics.top().find(aChecksum);
if (aIterator != maExportGraphics.top().end())
sPath = aIterator->second;
}
// #i15508# added BMP type for better exports
// export not yet active, so adding for reference (not checked)
case GfxLinkType::NativeBmp:
sMediaType = "image/bmp";
pExtension = ".bmp";
break;
if (sPath.isEmpty())
{
SvMemoryStream aStream;
const void* aData = aLink.GetData();
std::size_t nDataSize = aLink.GetDataSize();
case GfxLinkType::NativeJpg:
sMediaType = "image/jpeg";
pExtension = ".jpeg";
break;
case GfxLinkType::NativePng:
sMediaType = "image/png";
pExtension = ".png";
break;
case GfxLinkType::NativeTif:
sMediaType = "image/tiff";
pExtension = ".tif";
break;
case GfxLinkType::NativeWmf:
sMediaType = "image/x-wmf";
pExtension = ".wmf";
break;
case GfxLinkType::NativeMet:
sMediaType = "image/x-met";
pExtension = ".met";
break;
case GfxLinkType::NativePct:
sMediaType = "image/x-pict";
pExtension = ".pct";
break;
case GfxLinkType::NativeMov:
sMediaType = "application/movie";
pExtension = ".MOV";
break;
default:
switch (aLink.GetType())
{
GraphicType aType = rGraphic.GetType();
if ( aType == GraphicType::Bitmap || aType == GraphicType::GdiMetafile)
case GfxLinkType::NativeGif:
sMediaType = "image/gif";
pExtension = ".gif";
break;
// #i15508# added BMP type for better exports
// export not yet active, so adding for reference (not checked)
case GfxLinkType::NativeBmp:
sMediaType = "image/bmp";
pExtension = ".bmp";
break;
case GfxLinkType::NativeJpg:
sMediaType = "image/jpeg";
pExtension = ".jpeg";
break;
case GfxLinkType::NativePng:
sMediaType = "image/png";
pExtension = ".png";
break;
case GfxLinkType::NativeTif:
sMediaType = "image/tiff";
pExtension = ".tif";
break;
case GfxLinkType::NativeWmf:
sMediaType = "image/x-wmf";
pExtension = ".wmf";
break;
case GfxLinkType::NativeMet:
sMediaType = "image/x-met";
pExtension = ".met";
break;
case GfxLinkType::NativePct:
sMediaType = "image/x-pict";
pExtension = ".pct";
break;
case GfxLinkType::NativeMov:
sMediaType = "application/movie";
pExtension = ".MOV";
break;
default:
{
if ( aType == GraphicType::Bitmap )
GraphicType aType = rGraphic.GetType();
if (aType == GraphicType::Bitmap || aType == GraphicType::GdiMetafile)
{
(void)GraphicConverter::Export( aStream, rGraphic, ConvertDataFormat::PNG );
sMediaType = "image/png";
pExtension = ".png";
if (aType == GraphicType::Bitmap)
{
(void)GraphicConverter::Export(aStream, rGraphic, ConvertDataFormat::PNG);
sMediaType = "image/png";
pExtension = ".png";
}
else
{
(void)GraphicConverter::Export(aStream, rGraphic, ConvertDataFormat::EMF);
sMediaType = "image/x-emf";
pExtension = ".emf";
}
}
else
{
(void)GraphicConverter::Export( aStream, rGraphic, ConvertDataFormat::EMF );
sMediaType = "image/x-emf";
pExtension = ".emf";
SAL_WARN("oox.shape", "unhandled graphic type " << static_cast<int>(aType));
/*Earlier, even in case of unhandled graphic types we were
proceeding to write the image, which would eventually
write an empty image with a zero size, and return a valid
relationID, which is incorrect.
*/
return sRelId;
}
}
else
{
SAL_WARN("oox.shape", "unhandled graphic type " << static_cast<int>(aType) );
/*Earlier, even in case of unhandled graphic types we were
proceeding to write the image, which would eventually
write an empty image with a zero size, and return a valid
relationID, which is incorrect.
*/
return sRelId;
}
aData = aStream.GetData();
nDataSize = aStream.GetEndOfData();
break;
aData = aStream.GetData();
nDataSize = aStream.GetEndOfData();
break;
}
}
Reference<XOutputStream> xOutStream = mpFB->openFragmentStream(
OUStringBuffer()
.appendAscii(GetComponentDir())
.append("/media/image" + OUString::number(mnImageCounter))
.appendAscii(pExtension)
.makeStringAndClear(),
sMediaType);
xOutStream->writeBytes(Sequence<sal_Int8>(static_cast<const sal_Int8*>(aData), nDataSize));
xOutStream->closeOutput();
const OString sRelPathToMedia = "media/image";
OString sRelationCompPrefix;
if (bRelPathToMedia)
sRelationCompPrefix = "../";
else
sRelationCompPrefix = GetRelationCompPrefix();
sPath = OUStringBuffer()
.appendAscii(sRelationCompPrefix.getStr())
.appendAscii(sRelPathToMedia.getStr())
.append(static_cast<sal_Int32>(mnImageCounter++))
.appendAscii(pExtension)
.makeStringAndClear();
if (GetDocumentType() != DOCUMENT_DOCX && !maExportGraphics.empty())
maExportGraphics.top()[aChecksum] = sPath;
}
Reference< XOutputStream > xOutStream = mpFB->openFragmentStream( OUStringBuffer()
.appendAscii( GetComponentDir() )
.append( "/media/image" +
OUString::number(mnImageCounter) )
.appendAscii( pExtension )
.makeStringAndClear(),
sMediaType );
xOutStream->writeBytes( Sequence< sal_Int8 >( static_cast<const sal_Int8*>(aData), nDataSize ) );
xOutStream->closeOutput();
const OString sRelPathToMedia = "media/image";
OString sRelationCompPrefix;
if ( bRelPathToMedia )
sRelationCompPrefix = "../";
else
sRelationCompPrefix = GetRelationCompPrefix();
OUString sPath = OUStringBuffer()
.appendAscii( sRelationCompPrefix.getStr() )
.appendAscii( sRelPathToMedia.getStr() )
.append( static_cast<sal_Int32>(mnImageCounter ++) )
.appendAscii( pExtension )
.makeStringAndClear();
sRelId = mpFB->addRelation( mpFS->getOutputStream(),
oox::getRelationship(Relationship::IMAGE),
sPath );
diff --git a/sc/qa/unit/data/ods/tdf91286.ods b/sc/qa/unit/data/ods/tdf91286.ods
new file mode 100644
index 0000000..8bc2605
--- /dev/null
+++ b/sc/qa/unit/data/ods/tdf91286.ods
Binary files differ
diff --git a/sc/qa/unit/subsequent_export_test2.cxx b/sc/qa/unit/subsequent_export_test2.cxx
index 4d22a2f..ee3c141 100644
--- a/sc/qa/unit/subsequent_export_test2.cxx
+++ b/sc/qa/unit/subsequent_export_test2.cxx
@@ -49,6 +49,7 @@
#include <com/sun/star/drawing/XDrawPages.hpp>
#include <com/sun/star/drawing/XDrawPagesSupplier.hpp>
#include <com/sun/star/frame/Desktop.hpp>
#include <com/sun/star/packages/zip/ZipFileAccess.hpp>
#include <com/sun/star/sheet/GlobalSheetSettings.hpp>
#include <com/sun/star/sheet/XHeaderFooterContent.hpp>
#include <com/sun/star/text/XTextColumns.hpp>
@@ -186,6 +187,7 @@ public:
void testTdf130104_XLSXIndent();
void testWholeRowBold();
void testXlsxRowsOrder();
void testTdf91286();
CPPUNIT_TEST_SUITE(ScExportTest2);
@@ -305,6 +307,7 @@ public:
CPPUNIT_TEST(testTdf130104_XLSXIndent);
CPPUNIT_TEST(testWholeRowBold);
CPPUNIT_TEST(testXlsxRowsOrder);
CPPUNIT_TEST(testTdf91286);
CPPUNIT_TEST_SUITE_END();
@@ -3110,6 +3113,29 @@ void ScExportTest2::testXlsxRowsOrder()
xDocSh->DoClose();
}
void ScExportTest2::testTdf91286()
{
ScDocShellRef xDocSh = loadDoc(u"tdf91286.", FORMAT_ODS);
CPPUNIT_ASSERT(xDocSh.is());
std::shared_ptr<utl::TempFile> pTemp = exportTo(*xDocSh, FORMAT_XLSX);
xDocSh->DoClose();
Reference<packages::zip::XZipFileAccess2> xNameAccess
= packages::zip::ZipFileAccess::createWithURL(comphelper::getComponentContext(m_xSFactory),
pTemp->GetURL());
const Sequence<OUString> aNames(xNameAccess->getElementNames());
int nImageFiles = 0;
for (const auto& rElementName : aNames)
if (rElementName.startsWith("xl/media/image"))
nImageFiles++;
// Without the accompanying fix in place, this test would have failed with:
// - Expected: 1
// - Actual : 2
// i.e. the embedded picture would have been saved twice.
CPPUNIT_ASSERT_EQUAL(1, nImageFiles);
}
CPPUNIT_TEST_SUITE_REGISTRATION(ScExportTest2);
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sc/source/filter/excel/xestream.cxx b/sc/source/filter/excel/xestream.cxx
index 08048e9..b949592 100644
--- a/sc/source/filter/excel/xestream.cxx
+++ b/sc/source/filter/excel/xestream.cxx
@@ -1023,6 +1023,7 @@ bool XclExpXmlStream::exportDocument()
// Instead, write via XOutputStream instance.
tools::SvRef<SotStorage> rStorage = static_cast<SotStorage*>(nullptr);
drawingml::DrawingML::ResetMlCounters();
drawingml::DrawingML::PushExportGraphics();
XclExpRootData aData(
EXC_BIFF8, *pShell->GetMedium (), rStorage, rDoc,
@@ -1124,6 +1125,9 @@ bool XclExpXmlStream::exportDocument()
if (xStatusIndicator.is())
xStatusIndicator->end();
mpRoot = nullptr;
drawingml::DrawingML::PopExportGraphics();
return true;
}
diff --git a/sd/qa/unit/data/odp/tdf74670.odp b/sd/qa/unit/data/odp/tdf74670.odp
new file mode 100644
index 0000000..98e223e
--- /dev/null
+++ b/sd/qa/unit/data/odp/tdf74670.odp
Binary files differ
diff --git a/sd/qa/unit/export-tests-ooxml3.cxx b/sd/qa/unit/export-tests-ooxml3.cxx
index 776d436..3f4bec3 100644
--- a/sd/qa/unit/export-tests-ooxml3.cxx
+++ b/sd/qa/unit/export-tests-ooxml3.cxx
@@ -106,6 +106,7 @@ public:
void testTdf147121();
void testTdf140912_PicturePlaceholder();
void testEnhancedPathViewBox();
void testTdf74670();
CPPUNIT_TEST_SUITE(SdOOXMLExportTest3);
@@ -182,6 +183,7 @@ public:
CPPUNIT_TEST(testTdf147121);
CPPUNIT_TEST(testTdf140912_PicturePlaceholder);
CPPUNIT_TEST(testEnhancedPathViewBox);
CPPUNIT_TEST(testTdf74670);
CPPUNIT_TEST_SUITE_END();
virtual void registerNamespaces(xmlXPathContextPtr& pXmlXPathCtx) override
@@ -1893,6 +1895,30 @@ void SdOOXMLExportTest3::testEnhancedPathViewBox()
CPPUNIT_ASSERT_EQUAL(sal_Int32(2045), aBoundRectangle.Width);
}
void SdOOXMLExportTest3::testTdf74670()
{
::sd::DrawDocShellRef xDocShRef
= loadURL(m_directories.getURLFromSrc(u"/sd/qa/unit/data/odp/tdf74670.odp"), ODP);
utl::TempFile tmpfile;
xDocShRef = saveAndReload(xDocShRef.get(), PPTX, &tmpfile);
xDocShRef->DoClose();
uno::Reference<packages::zip::XZipFileAccess2> xNameAccess
= packages::zip::ZipFileAccess::createWithURL(comphelper::getComponentContext(m_xSFactory),
tmpfile.GetURL());
const uno::Sequence<OUString> aNames(xNameAccess->getElementNames());
int nImageFiles = 0;
for (const auto& rElementName : aNames)
if (rElementName.startsWith("ppt/media/image"))
nImageFiles++;
// Without the accompanying fix in place, this test would have failed with:
// - Expected: 1
// - Actual : 2
// i.e. the embedded picture would have been saved twice.
CPPUNIT_ASSERT_EQUAL(1, nImageFiles);
}
CPPUNIT_TEST_SUITE_REGISTRATION(SdOOXMLExportTest3);
CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/sd/source/filter/eppt/pptx-epptooxml.cxx b/sd/source/filter/eppt/pptx-epptooxml.cxx
index 07be652..8823cbd 100644
--- a/sd/source/filter/eppt/pptx-epptooxml.cxx
+++ b/sd/source/filter/eppt/pptx-epptooxml.cxx
@@ -428,6 +428,7 @@ bool PowerPointExport::importDocument() noexcept
bool PowerPointExport::exportDocument()
{
DrawingML::ResetCounters();
DrawingML::PushExportGraphics();
maShapeMap.clear();
mXModel = getModel();
@@ -504,6 +505,7 @@ bool PowerPointExport::exportDocument()
commitStorage();
DrawingML::PopExportGraphics();
maShapeMap.clear();
maAuthors.clear();
maRelId.clear();