pdf: extract XMP metadata writing and use XmlWriter
Instead of writing XMP metadata with a string buffer, change to
use XmlWriter instead. Extract XMP metadata writing into its own
class vcl::pdf::XmpMetadata.
This also needs a change to the XmlWriter to not write a classic
XML header: '<?xml version="1.0" ... ?>'
Change-Id: I95ea0e7ba58e7c43a0c707bf9c676994210ff104
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/85908
Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
Tested-by: Tomaž Vajngerl <quikee@gmail.com>
diff --git a/include/tools/XmlWriter.hxx b/include/tools/XmlWriter.hxx
index da056c6..7efe3a5 100644
--- a/include/tools/XmlWriter.hxx
+++ b/include/tools/XmlWriter.hxx
@@ -40,7 +40,7 @@ public:
~XmlWriter();
bool startDocument(sal_Int32 nIndent = 2);
bool startDocument(sal_Int32 nIndent = 2, bool bWriteXmlHeader = true);
void endDocument();
void startElement(const OString& sName);
diff --git a/tools/source/xml/XmlWriter.cxx b/tools/source/xml/XmlWriter.cxx
index 3400a6e..a314eed 100644
--- a/tools/source/xml/XmlWriter.cxx
+++ b/tools/source/xml/XmlWriter.cxx
@@ -36,11 +36,13 @@ struct XmlWriterImpl
XmlWriterImpl(SvStream* pStream)
: mpStream(pStream)
, mpWriter(nullptr)
, mbWriteXmlHeader(true)
{
}
SvStream* const mpStream;
xmlTextWriterPtr mpWriter;
bool mbWriteXmlHeader;
};
XmlWriter::XmlWriter(SvStream* pStream)
@@ -54,21 +56,24 @@ XmlWriter::~XmlWriter()
endDocument();
}
bool XmlWriter::startDocument(sal_Int32 nIndent)
bool XmlWriter::startDocument(sal_Int32 nIndent, bool bWriteXmlHeader)
{
mpImpl->mbWriteXmlHeader = bWriteXmlHeader;
xmlOutputBufferPtr xmlOutBuffer
= xmlOutputBufferCreateIO(funcWriteCallback, funcCloseCallback, mpImpl->mpStream, nullptr);
mpImpl->mpWriter = xmlNewTextWriter(xmlOutBuffer);
if (mpImpl->mpWriter == nullptr)
return false;
xmlTextWriterSetIndent(mpImpl->mpWriter, nIndent);
xmlTextWriterStartDocument(mpImpl->mpWriter, nullptr, "UTF-8", nullptr);
if (mpImpl->mbWriteXmlHeader)
xmlTextWriterStartDocument(mpImpl->mpWriter, nullptr, "UTF-8", nullptr);
return true;
}
void XmlWriter::endDocument()
{
xmlTextWriterEndDocument(mpImpl->mpWriter);
if (mpImpl->mbWriteXmlHeader)
xmlTextWriterEndDocument(mpImpl->mpWriter);
xmlFreeTextWriter(mpImpl->mpWriter);
mpImpl->mpWriter = nullptr;
}
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index b401d81..0bc2715 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -448,6 +448,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
vcl/source/fontsubset/sft \
vcl/source/fontsubset/ttcr \
vcl/source/fontsubset/xlat \
vcl/source/pdf/XmpMetadata \
vcl/source/uitest/logger \
vcl/source/uitest/uiobject \
vcl/source/uitest/uitest \
diff --git a/vcl/inc/pdf/XmpMetadata.hxx b/vcl/inc/pdf/XmpMetadata.hxx
new file mode 100644
index 0000000..d9f9cac
--- /dev/null
+++ b/vcl/inc/pdf/XmpMetadata.hxx
@@ -0,0 +1,47 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
*/
#ifndef INCLUDED_VCL_INC_PDF_XMPMETADATA_HXX
#define INCLUDED_VCL_INC_PDF_XMPMETADATA_HXX
#include <vcl/dllapi.h>
#include <rtl/string.hxx>
#include <tools/stream.hxx>
#include <memory>
namespace vcl::pdf
{
class XmpMetadata
{
private:
bool mbWritten;
std::unique_ptr<SvMemoryStream> mpMemoryStream;
public:
OString msTitle;
OString msAuthor;
OString msSubject;
OString msProducer;
OString msKeywords;
sal_Int32 mnPDF_A;
public:
XmpMetadata();
sal_uInt64 getSize();
const void* getData();
private:
void write();
};
}
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index f5fb555..891d5db 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -74,6 +74,7 @@
#include <textlineinfo.hxx>
#include <bitmapwriteaccess.hxx>
#include <impglyphitem.hxx>
#include <pdf/XmpMetadata.hxx>
#include "pdfwriter_impl.hxx"
@@ -5232,131 +5233,43 @@ sal_Int32 PDFWriterImpl::emitDocumentMetadata()
if( updateObject( nObject ) )
{
// the following string are written in UTF-8 unicode
OStringBuffer aMetadataStream( 8192 );
pdf::XmpMetadata aMetadata;
aMetadataStream.append( "<?xpacket begin=\"" );
// these lines write Unicode "zero width non-breaking space character" (U+FEFF)
// (aka byte-order mark ) used as a byte-order marker.
aMetadataStream.append( OUStringToOString( OUString( u'\xFEFF' ), RTL_TEXTENCODING_UTF8 ) );
aMetadataStream.append( "\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" );
aMetadataStream.append( "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\">\n" );
aMetadataStream.append( " <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" );
//PDF/A part ( ISO 19005-1:2005 - 6.7.11 )
aMetadataStream.append( " <rdf:Description rdf:about=\"\"\n" );
aMetadataStream.append( " xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n" );
if( m_bIsPDF_A2 )
{
aMetadataStream.append( " <pdfaid:part>2</pdfaid:part>\n" );
aMetadataStream.append( " <pdfaid:conformance>B</pdfaid:conformance>\n" );
}
else
{
aMetadataStream.append( " <pdfaid:part>1</pdfaid:part>\n" );
aMetadataStream.append( " <pdfaid:conformance>A</pdfaid:conformance>\n" );
}
aMetadataStream.append( " </rdf:Description>\n" );
//... Dublin Core properties go here
if( !m_aContext.DocumentInfo.Title.isEmpty() ||
!m_aContext.DocumentInfo.Author.isEmpty() ||
!m_aContext.DocumentInfo.Subject.isEmpty() )
{
aMetadataStream.append( " <rdf:Description rdf:about=\"\"\n" );
aMetadataStream.append( " xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n" );
if( !m_aContext.DocumentInfo.Title.isEmpty() )
{
// this is according to PDF/A-1, technical corrigendum 1 (2007-04-01)
aMetadataStream.append( " <dc:title>\n" );
aMetadataStream.append( " <rdf:Alt>\n" );
aMetadataStream.append( " <rdf:li xml:lang=\"x-default\">" );
OUString aTitle;
escapeStringXML( m_aContext.DocumentInfo.Title, aTitle );
aMetadataStream.append( OUStringToOString( aTitle, RTL_TEXTENCODING_UTF8 ) );
aMetadataStream.append( "</rdf:li>\n" );
aMetadataStream.append( " </rdf:Alt>\n" );
aMetadataStream.append( " </dc:title>\n" );
}
if( !m_aContext.DocumentInfo.Author.isEmpty() )
{
aMetadataStream.append( " <dc:creator>\n" );
aMetadataStream.append( " <rdf:Seq>\n" );
aMetadataStream.append( " <rdf:li>" );
OUString aAuthor;
escapeStringXML( m_aContext.DocumentInfo.Author, aAuthor );
aMetadataStream.append( OUStringToOString( aAuthor , RTL_TEXTENCODING_UTF8 ) );
aMetadataStream.append( "</rdf:li>\n" );
aMetadataStream.append( " </rdf:Seq>\n" );
aMetadataStream.append( " </dc:creator>\n" );
}
if( !m_aContext.DocumentInfo.Subject.isEmpty() )
{
// this is according to PDF/A-1, technical corrigendum 1 (2007-04-01)
aMetadataStream.append( " <dc:description>\n" );
aMetadataStream.append( " <rdf:Alt>\n" );
aMetadataStream.append( " <rdf:li xml:lang=\"x-default\">" );
OUString aSubject;
escapeStringXML( m_aContext.DocumentInfo.Subject, aSubject );
aMetadataStream.append( OUStringToOString( aSubject , RTL_TEXTENCODING_UTF8 ) );
aMetadataStream.append( "</rdf:li>\n" );
aMetadataStream.append( " </rdf:Alt>\n" );
aMetadataStream.append( " </dc:description>\n" );
}
aMetadataStream.append( " </rdf:Description>\n" );
}
if (m_bIsPDF_A1)
aMetadata.mnPDF_A = 1;
else if (m_bIsPDF_A2)
aMetadata.mnPDF_A = 2;
//... PDF properties go here
if( !m_aContext.DocumentInfo.Producer.isEmpty() ||
!m_aContext.DocumentInfo.Keywords.isEmpty() )
if (!m_aContext.DocumentInfo.Title.isEmpty())
{
aMetadataStream.append( " <rdf:Description rdf:about=\"\"\n" );
aMetadataStream.append( " xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\">\n" );
if( !m_aContext.DocumentInfo.Producer.isEmpty() )
{
aMetadataStream.append( " <pdf:Producer>" );
OUString aProducer;
escapeStringXML( m_aContext.DocumentInfo.Producer, aProducer );
aMetadataStream.append( OUStringToOString( aProducer , RTL_TEXTENCODING_UTF8 ) );
aMetadataStream.append( "</pdf:Producer>\n" );
}
if( !m_aContext.DocumentInfo.Keywords.isEmpty() )
{
aMetadataStream.append( " <pdf:Keywords>" );
OUString aKeywords;
escapeStringXML( m_aContext.DocumentInfo.Keywords, aKeywords );
aMetadataStream.append( OUStringToOString( aKeywords , RTL_TEXTENCODING_UTF8 ) );
aMetadataStream.append( "</pdf:Keywords>\n" );
}
aMetadataStream.append( " </rdf:Description>\n" );
OUString aTempString;
escapeStringXML(m_aContext.DocumentInfo.Title, aTempString);
aMetadata.msTitle = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
}
aMetadataStream.append( " <rdf:Description rdf:about=\"\"\n" );
aMetadataStream.append( " xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\">\n" );
if( !m_aContext.DocumentInfo.Creator.isEmpty() )
if (!m_aContext.DocumentInfo.Author.isEmpty())
{
aMetadataStream.append( " <xmp:CreatorTool>" );
OUString aCreator;
escapeStringXML( m_aContext.DocumentInfo.Creator, aCreator );
aMetadataStream.append( OUStringToOString( aCreator , RTL_TEXTENCODING_UTF8 ) );
aMetadataStream.append( "</xmp:CreatorTool>\n" );
OUString aTempString;
escapeStringXML(m_aContext.DocumentInfo.Author, aTempString);
aMetadata.msAuthor = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
}
//creation date
aMetadataStream.append( " <xmp:CreateDate>" );
aMetadataStream.append( m_aCreationMetaDateString );
aMetadataStream.append( "</xmp:CreateDate>\n" );
aMetadataStream.append( " </rdf:Description>\n" );
aMetadataStream.append( " </rdf:RDF>\n" );
aMetadataStream.append( "</x:xmpmeta>\n" );
//add the padding
for( sal_Int32 nSpaces = 1; nSpaces <= 2100; nSpaces++ )
if (!m_aContext.DocumentInfo.Subject.isEmpty())
{
aMetadataStream.append( " " );
if( nSpaces % 100 == 0 )
aMetadataStream.append( "\n" );
OUString aTempString;
escapeStringXML(m_aContext.DocumentInfo.Subject, aTempString);
aMetadata.msSubject = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
}
aMetadataStream.append( "<?xpacket end=\"w\"?>\n" );
if (!m_aContext.DocumentInfo.Producer.isEmpty())
{
OUString aTempString;
escapeStringXML(m_aContext.DocumentInfo.Producer, aTempString);
aMetadata.msProducer = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
}
if (!m_aContext.DocumentInfo.Keywords.isEmpty())
{
OUString aTempString;
escapeStringXML(m_aContext.DocumentInfo.Keywords, aTempString);
aMetadata.msKeywords = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
}
OStringBuffer aMetadataObj( 1024 );
@@ -5365,12 +5278,12 @@ sal_Int32 PDFWriterImpl::emitDocumentMetadata()
aMetadataObj.append( "<</Type/Metadata/Subtype/XML/Length " );
aMetadataObj.append( aMetadataStream.getLength() );
aMetadataObj.append( sal_Int32(aMetadata.getSize()) );
aMetadataObj.append( ">>\nstream\n" );
if ( !writeBuffer( aMetadataObj.getStr(), aMetadataObj.getLength() ) )
return 0;
//emit the stream
if ( !writeBuffer( aMetadataStream.getStr(), aMetadataStream.getLength() ) )
if ( !writeBuffer( aMetadata.getData(), aMetadata.getSize() ) )
return 0;
aMetadataObj.setLength( 0 );
diff --git a/vcl/source/pdf/XmpMetadata.cxx b/vcl/source/pdf/XmpMetadata.cxx
new file mode 100644
index 0000000..d9033f4
--- /dev/null
+++ b/vcl/source/pdf/XmpMetadata.cxx
@@ -0,0 +1,159 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
*/
#include <pdf/XmpMetadata.hxx>
#include <tools/XmlWriter.hxx>
namespace vcl::pdf
{
namespace
{
constexpr const char* constPadding = " "
" "
" "
" "
" "
"\n";
}
XmpMetadata::XmpMetadata()
: mbWritten(false)
, mnPDF_A(0)
{
}
void XmpMetadata::write()
{
mpMemoryStream = std::make_unique<SvMemoryStream>(4096 /*Initial*/, 64 /*Resize*/);
// Header
mpMemoryStream->WriteOString("<?xpacket begin=\"");
mpMemoryStream->WriteOString(OUStringToOString(OUString(u'\xFEFF'), RTL_TEXTENCODING_UTF8));
mpMemoryStream->WriteOString("\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n");
{
tools::XmlWriter aXmlWriter(mpMemoryStream.get());
aXmlWriter.startDocument(2, false);
aXmlWriter.startElement("x", "xmpmeta", "adobe:ns:meta/");
aXmlWriter.startElement("rdf", "RDF", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");
// PDF/A part ( ISO 19005-1:2005 - 6.7.11 )
if (mnPDF_A > 0)
{
OString sPdfVersion = OString::number(mnPDF_A);
OString sPdfConformance = (mnPDF_A == 1) ? "A" : "B";
aXmlWriter.startElement("rdf:Description");
aXmlWriter.attribute("rdf:about", OString(""));
aXmlWriter.attribute("xmlns:pdfaid", OString("http://www.aiim.org/pdfa/ns/id/"));
aXmlWriter.startElement("pdfaid:part");
aXmlWriter.content(sPdfVersion);
aXmlWriter.endElement();
aXmlWriter.startElement("pdfaid:conformance");
aXmlWriter.content(sPdfConformance);
aXmlWriter.endElement();
aXmlWriter.endElement();
}
// Dublin Core properties
if (!msTitle.isEmpty() || !msAuthor.isEmpty() || !msSubject.isEmpty())
{
aXmlWriter.startElement("rdf:Description");
aXmlWriter.attribute("rdf:about", OString(""));
aXmlWriter.attribute("xmlns:dc", OString("http://purl.org/dc/elements/1.1/"));
if (!msTitle.isEmpty())
{
// this is according to PDF/A-1, technical corrigendum 1 (2007-04-01)
aXmlWriter.startElement("dc:title");
aXmlWriter.startElement("rdf:Alt");
aXmlWriter.startElement("rdf:li");
aXmlWriter.attribute("xml:lang", OString("x-default"));
aXmlWriter.content(msTitle);
aXmlWriter.endElement();
aXmlWriter.endElement();
aXmlWriter.endElement();
}
if (!msAuthor.isEmpty())
{
aXmlWriter.startElement("dc:creator");
aXmlWriter.startElement("rdf:Seq");
aXmlWriter.startElement("rdf:li");
aXmlWriter.content(msAuthor);
aXmlWriter.endElement();
aXmlWriter.endElement();
aXmlWriter.endElement();
}
if (!msSubject.isEmpty())
{
aXmlWriter.startElement("dc:description");
aXmlWriter.startElement("rdf:Alt");
aXmlWriter.startElement("rdf:li");
aXmlWriter.attribute("xml:lang", OString("x-default"));
aXmlWriter.content(msSubject);
aXmlWriter.endElement();
aXmlWriter.endElement();
aXmlWriter.endElement();
}
aXmlWriter.endElement();
}
// PDF properties
if (!msProducer.isEmpty() || !msKeywords.isEmpty())
{
aXmlWriter.startElement("rdf:Description");
aXmlWriter.attribute("rdf:about", OString(""));
aXmlWriter.attribute("xmlns:pdf", OString("http://ns.adobe.com/pdf/1.3/"));
if (!msProducer.isEmpty())
{
aXmlWriter.startElement("pdf:Producer");
aXmlWriter.content(msProducer);
aXmlWriter.endElement();
}
if (!msKeywords.isEmpty())
{
aXmlWriter.startElement("pdf:Keywords");
aXmlWriter.content(msKeywords);
aXmlWriter.endElement();
}
aXmlWriter.endElement();
}
aXmlWriter.endElement();
aXmlWriter.endElement();
aXmlWriter.endDocument();
}
// add padding (needed so the metadata can be changed in-place"
for (sal_Int32 nSpaces = 1; nSpaces <= 21; nSpaces++)
mpMemoryStream->WriteOString(constPadding);
mpMemoryStream->WriteOString("<?xpacket end=\"w\"?>\n");
mbWritten = true;
}
sal_uInt64 XmpMetadata::getSize()
{
if (!mbWritten)
write();
return mpMemoryStream->GetSize();
}
const void* XmpMetadata::getData()
{
if (!mbWritten)
write();
return mpMemoryStream->GetData();
}
} // end vcl::pdf
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */