tdf#107018 PDF export of PDF images: handle references in nested dictionaries
Also get rid of the GetKeyOffset() and GetKeyValueLength() calls when
copying dictionaries: the reference already knows its offset and length,
so no need to call them. This makes the dictionary and the array
handling more similar.
Change-Id: I65936acfaf857636a8d83da3a4cec69289eb89d8
Reviewed-on: https://gerrit.libreoffice.org/36282
Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk>
Tested-by: Jenkins <ci@libreoffice.org>
diff --git a/include/vcl/filter/pdfdocument.hxx b/include/vcl/filter/pdfdocument.hxx
index 595b4f0..d83cb83 100644
--- a/include/vcl/filter/pdfdocument.hxx
+++ b/include/vcl/filter/pdfdocument.hxx
@@ -71,6 +71,9 @@ class VCL_DLLPUBLIC PDFObjectElement : public PDFElement
std::vector< std::unique_ptr<PDFElement> > m_aElements;
/// Uncompressed buffer of an object in an object stream.
std::unique_ptr<SvMemoryStream> m_pStreamBuffer;
/// List of all reference elements inside this object's dictionary and
/// nested dictionaries.
std::vector<PDFReferenceElement*> m_aDictionaryReferences;
public:
PDFObjectElement(PDFDocument& rDoc, double fObjectValue, double fGenerationValue);
@@ -88,8 +91,8 @@ public:
PDFNumberElement* GetNumberElement() const;
/// Get access to the parsed key-value items from the object dictionary.
const std::map<OString, PDFElement*>& GetDictionaryItems();
/// Same as GetDictionaryItems(), but entries are sorted by file offset.
std::vector< std::pair<OString, PDFElement*> > GetDictionaryItemsByOffset();
const std::vector<PDFReferenceElement*>& GetDictionaryReferences() const;
void AddDictionaryReference(PDFReferenceElement* pReference);
void SetArray(PDFArrayElement* pArrayElement);
void SetStream(PDFStreamElement* pStreamElement);
/// Access to the stream of the object, if it has any.
diff --git a/vcl/qa/cppunit/pdfexport/data/tdf107018.odt b/vcl/qa/cppunit/pdfexport/data/tdf107018.odt
new file mode 100644
index 0000000..3bfc7b2
--- /dev/null
+++ b/vcl/qa/cppunit/pdfexport/data/tdf107018.odt
Binary files differ
diff --git a/vcl/qa/cppunit/pdfexport/pdfexport.cxx b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
index 31d0dfb..aacf36b 100644
--- a/vcl/qa/cppunit/pdfexport/pdfexport.cxx
+++ b/vcl/qa/cppunit/pdfexport/pdfexport.cxx
@@ -53,6 +53,7 @@ public:
void testTdf106972();
void testTdf106972Pdf17();
void testTdf107013();
void testTdf107018();
#endif
CPPUNIT_TEST_SUITE(PdfExportTest);
@@ -65,6 +66,7 @@ public:
CPPUNIT_TEST(testTdf106972);
CPPUNIT_TEST(testTdf106972Pdf17);
CPPUNIT_TEST(testTdf107013);
CPPUNIT_TEST(testTdf107018);
#endif
CPPUNIT_TEST_SUITE_END();
};
@@ -402,6 +404,54 @@ void PdfExportTest::testTdf107013()
// This failed, the reference to the image was created, but not the image.
CPPUNIT_ASSERT(pXObject);
}
void PdfExportTest::testTdf107018()
{
vcl::filter::PDFDocument aDocument;
load("tdf107018.odt", aDocument);
// Get access to the only image on the only page.
std::vector<vcl::filter::PDFObjectElement*> aPages = aDocument.GetPages();
CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), aPages.size());
vcl::filter::PDFObjectElement* pResources = aPages[0]->LookupObject("Resources");
CPPUNIT_ASSERT(pResources);
auto pXObjects = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pResources->Lookup("XObject"));
CPPUNIT_ASSERT(pXObjects);
CPPUNIT_ASSERT_EQUAL(static_cast<size_t>(1), pXObjects->GetItems().size());
vcl::filter::PDFObjectElement* pXObject = pXObjects->LookupObject(pXObjects->GetItems().begin()->first);
CPPUNIT_ASSERT(pXObject);
// Get access to the form object inside the image.
auto pXObjectResources = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pXObject->Lookup("Resources"));
CPPUNIT_ASSERT(pXObjectResources);
auto pXObjectForms = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pXObjectResources->LookupElement("XObject"));
CPPUNIT_ASSERT(pXObjectForms);
vcl::filter::PDFObjectElement* pForm = pXObjectForms->LookupObject(pXObjectForms->GetItems().begin()->first);
CPPUNIT_ASSERT(pForm);
// Get access to Resources -> Font -> F1 of the form.
auto pFormResources = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pForm->Lookup("Resources"));
CPPUNIT_ASSERT(pFormResources);
auto pFonts = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pFormResources->LookupElement("Font"));
CPPUNIT_ASSERT(pFonts);
auto pF1Ref = dynamic_cast<vcl::filter::PDFReferenceElement*>(pFonts->LookupElement("F1"));
CPPUNIT_ASSERT(pF1Ref);
vcl::filter::PDFObjectElement* pF1 = pF1Ref->LookupObject();
CPPUNIT_ASSERT(pF1);
// Check that Foo -> Bar of the font is of type Pages.
auto pFontFoo = dynamic_cast<vcl::filter::PDFDictionaryElement*>(pF1->Lookup("Foo"));
CPPUNIT_ASSERT(pFontFoo);
auto pBar = dynamic_cast<vcl::filter::PDFReferenceElement*>(pFontFoo->LookupElement("Bar"));
CPPUNIT_ASSERT(pBar);
vcl::filter::PDFObjectElement* pObject = pBar->LookupObject();
CPPUNIT_ASSERT(pObject);
auto pName = dynamic_cast<vcl::filter::PDFNameElement*>(pObject->Lookup("Type"));
CPPUNIT_ASSERT(pName);
// This was "XObject", reference in a nested dictionary wasn't updated when
// copying the page stream of a PDF image.
CPPUNIT_ASSERT_EQUAL(OString("Pages"), pName->GetValue());
}
#endif
CPPUNIT_TEST_SUITE_REGISTRATION(PdfExportTest);
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index 43d4248..b0bb8be 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -1071,10 +1071,14 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
}
else
{
rElements.push_back(std::unique_ptr<PDFElement>(new PDFReferenceElement(*this, *pObjectNumber, *pGenerationNumber)));
auto pReference = new PDFReferenceElement(*this, *pObjectNumber, *pGenerationNumber);
rElements.push_back(std::unique_ptr<PDFElement>(pReference));
if (pArray)
// Reference is part of a direct (non-dictionary) array, inform the array.
pArray->PushBack(rElements.back().get());
if (bInObject && nDictionaryDepth > 0 && pObject)
// Inform the object about a new in-dictionary reference.
pObject->AddDictionaryReference(pReference);
}
if (!rElements.back()->Read(rStream))
{
@@ -2512,23 +2516,14 @@ PDFNumberElement* PDFObjectElement::GetNumberElement() const
return m_pNumberElement;
}
std::vector< std::pair<OString, PDFElement*> > PDFObjectElement::GetDictionaryItemsByOffset()
const std::vector<PDFReferenceElement*>& PDFObjectElement::GetDictionaryReferences() const
{
std::vector< std::pair<OString, PDFElement*> > aRet;
return m_aDictionaryReferences;
}
for (const auto& rItem : m_aDictionary)
aRet.push_back(rItem);
PDFDictionaryElement* pDictionary = GetDictionary();
if (!pDictionary)
return aRet;
std::sort(aRet.begin(), aRet.end(), [pDictionary](const std::pair<OString, PDFElement*>& a, const std::pair<OString, PDFElement*>& b) -> bool
{
return pDictionary->GetKeyOffset(a.first) < pDictionary->GetKeyOffset(b.first);
});
return aRet;
void PDFObjectElement::AddDictionaryReference(PDFReferenceElement* pReference)
{
m_aDictionaryReferences.push_back(pReference);
}
const std::map<OString, PDFElement*>& PDFObjectElement::GetDictionaryItems()
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index 8445377..d5c1f6e 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -10899,17 +10899,15 @@ sal_Int32 PDFWriterImpl::copyExternalResource(SvMemoryStream& rDocBuffer, filter
OStringBuffer aLine;
aLine.append(nObject);
aLine.append(" 0 obj\n");
if (filter::PDFDictionaryElement* pDictionary = rObject.GetDictionary())
if (rObject.GetDictionary())
{
aLine.append("<<");
// Complex case: can't copy the dictionary byte array as is, as it may contain references.
bool bDone = false;
std::vector< std::pair<OString, filter::PDFElement*> > aItems = rObject.GetDictionaryItemsByOffset();
sal_uInt64 nCopyStart = 0;
for (const auto& rItem : aItems)
for (auto pReference : rObject.GetDictionaryReferences())
{
auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);
if (pReference)
{
filter::PDFObjectElement* pReferenced = pReference->LookupObject();
@@ -10918,8 +10916,8 @@ sal_Int32 PDFWriterImpl::copyExternalResource(SvMemoryStream& rDocBuffer, filter
// Copy the referenced object.
sal_Int32 nRef = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
sal_uInt64 nReferenceStart = pDictionary->GetKeyOffset(rItem.first) + rItem.first.getLength();
sal_uInt64 nReferenceEnd = pDictionary->GetKeyOffset(rItem.first) + pDictionary->GetKeyValueLength(rItem.first);
sal_uInt64 nReferenceStart = pReference->GetObjectElement().GetLocation();
sal_uInt64 nReferenceEnd = pReference->GetOffset();
sal_uInt64 nOffset = 0;
if (nCopyStart == 0)
// Dict start -> reference start.