pdf: extract XMP metadata writing and use XmlWriter

Instead of writing XMP metadata with a string buffer, change to
use XmlWriter instead. Extract XMP metadata writing into its own
class vcl::pdf::XmpMetadata.

This also needs a change to the XmlWriter to not write a classic
XML header: '<?xml version="1.0" ... ?>'

Change-Id: I95ea0e7ba58e7c43a0c707bf9c676994210ff104
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/85908
Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
Tested-by: Tomaž Vajngerl <quikee@gmail.com>
diff --git a/include/tools/XmlWriter.hxx b/include/tools/XmlWriter.hxx
index da056c6..7efe3a5 100644
--- a/include/tools/XmlWriter.hxx
+++ b/include/tools/XmlWriter.hxx
@@ -40,7 +40,7 @@ public:

    ~XmlWriter();

    bool startDocument(sal_Int32 nIndent = 2);
    bool startDocument(sal_Int32 nIndent = 2, bool bWriteXmlHeader = true);
    void endDocument();

    void startElement(const OString& sName);
diff --git a/tools/source/xml/XmlWriter.cxx b/tools/source/xml/XmlWriter.cxx
index 3400a6e..a314eed 100644
--- a/tools/source/xml/XmlWriter.cxx
+++ b/tools/source/xml/XmlWriter.cxx
@@ -36,11 +36,13 @@ struct XmlWriterImpl
    XmlWriterImpl(SvStream* pStream)
        : mpStream(pStream)
        , mpWriter(nullptr)
        , mbWriteXmlHeader(true)
    {
    }

    SvStream* const mpStream;
    xmlTextWriterPtr mpWriter;
    bool mbWriteXmlHeader;
};

XmlWriter::XmlWriter(SvStream* pStream)
@@ -54,21 +56,24 @@ XmlWriter::~XmlWriter()
        endDocument();
}

bool XmlWriter::startDocument(sal_Int32 nIndent)
bool XmlWriter::startDocument(sal_Int32 nIndent, bool bWriteXmlHeader)
{
    mpImpl->mbWriteXmlHeader = bWriteXmlHeader;
    xmlOutputBufferPtr xmlOutBuffer
        = xmlOutputBufferCreateIO(funcWriteCallback, funcCloseCallback, mpImpl->mpStream, nullptr);
    mpImpl->mpWriter = xmlNewTextWriter(xmlOutBuffer);
    if (mpImpl->mpWriter == nullptr)
        return false;
    xmlTextWriterSetIndent(mpImpl->mpWriter, nIndent);
    xmlTextWriterStartDocument(mpImpl->mpWriter, nullptr, "UTF-8", nullptr);
    if (mpImpl->mbWriteXmlHeader)
        xmlTextWriterStartDocument(mpImpl->mpWriter, nullptr, "UTF-8", nullptr);
    return true;
}

void XmlWriter::endDocument()
{
    xmlTextWriterEndDocument(mpImpl->mpWriter);
    if (mpImpl->mbWriteXmlHeader)
        xmlTextWriterEndDocument(mpImpl->mpWriter);
    xmlFreeTextWriter(mpImpl->mpWriter);
    mpImpl->mpWriter = nullptr;
}
diff --git a/vcl/Library_vcl.mk b/vcl/Library_vcl.mk
index b401d81..0bc2715 100644
--- a/vcl/Library_vcl.mk
+++ b/vcl/Library_vcl.mk
@@ -448,6 +448,7 @@ $(eval $(call gb_Library_add_exception_objects,vcl,\
    vcl/source/fontsubset/sft \
    vcl/source/fontsubset/ttcr \
    vcl/source/fontsubset/xlat \
    vcl/source/pdf/XmpMetadata \
    vcl/source/uitest/logger \
    vcl/source/uitest/uiobject \
    vcl/source/uitest/uitest \
diff --git a/vcl/inc/pdf/XmpMetadata.hxx b/vcl/inc/pdf/XmpMetadata.hxx
new file mode 100644
index 0000000..d9f9cac
--- /dev/null
+++ b/vcl/inc/pdf/XmpMetadata.hxx
@@ -0,0 +1,47 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 */

#ifndef INCLUDED_VCL_INC_PDF_XMPMETADATA_HXX
#define INCLUDED_VCL_INC_PDF_XMPMETADATA_HXX

#include <vcl/dllapi.h>
#include <rtl/string.hxx>
#include <tools/stream.hxx>
#include <memory>

namespace vcl::pdf
{
class XmpMetadata
{
private:
    bool mbWritten;
    std::unique_ptr<SvMemoryStream> mpMemoryStream;

public:
    OString msTitle;
    OString msAuthor;
    OString msSubject;
    OString msProducer;
    OString msKeywords;
    sal_Int32 mnPDF_A;

public:
    XmpMetadata();
    sal_uInt64 getSize();
    const void* getData();

private:
    void write();
};
}

#endif

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index f5fb555..891d5db 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -74,6 +74,7 @@
#include <textlineinfo.hxx>
#include <bitmapwriteaccess.hxx>
#include <impglyphitem.hxx>
#include <pdf/XmpMetadata.hxx>

#include "pdfwriter_impl.hxx"

@@ -5232,131 +5233,43 @@ sal_Int32 PDFWriterImpl::emitDocumentMetadata()

    if( updateObject( nObject ) )
    {
        // the following string are written in UTF-8 unicode
        OStringBuffer aMetadataStream( 8192 );
        pdf::XmpMetadata aMetadata;

        aMetadataStream.append( "<?xpacket begin=\"" );
        // these lines write Unicode "zero width non-breaking space character" (U+FEFF)
        // (aka byte-order mark ) used as a byte-order marker.
        aMetadataStream.append( OUStringToOString( OUString( u'\xFEFF' ), RTL_TEXTENCODING_UTF8 ) );
        aMetadataStream.append( "\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n" );
        aMetadataStream.append( "<x:xmpmeta xmlns:x=\"adobe:ns:meta/\">\n" );
        aMetadataStream.append( " <rdf:RDF xmlns:rdf=\"http://www.w3.org/1999/02/22-rdf-syntax-ns#\">\n" );
        //PDF/A part ( ISO 19005-1:2005 - 6.7.11 )
        aMetadataStream.append( "  <rdf:Description rdf:about=\"\"\n" );
        aMetadataStream.append( "      xmlns:pdfaid=\"http://www.aiim.org/pdfa/ns/id/\">\n" );
        if( m_bIsPDF_A2 )
        {
            aMetadataStream.append( "   <pdfaid:part>2</pdfaid:part>\n" );
            aMetadataStream.append( "   <pdfaid:conformance>B</pdfaid:conformance>\n" );
        }
        else
        {
            aMetadataStream.append( "   <pdfaid:part>1</pdfaid:part>\n" );
            aMetadataStream.append( "   <pdfaid:conformance>A</pdfaid:conformance>\n" );
        }
        aMetadataStream.append( "  </rdf:Description>\n" );
        //... Dublin Core properties go here
        if( !m_aContext.DocumentInfo.Title.isEmpty() ||
            !m_aContext.DocumentInfo.Author.isEmpty() ||
            !m_aContext.DocumentInfo.Subject.isEmpty() )
        {
            aMetadataStream.append( "  <rdf:Description rdf:about=\"\"\n" );
            aMetadataStream.append( "      xmlns:dc=\"http://purl.org/dc/elements/1.1/\">\n" );
            if( !m_aContext.DocumentInfo.Title.isEmpty() )
            {
                // this is according to PDF/A-1, technical corrigendum 1 (2007-04-01)
                aMetadataStream.append( "   <dc:title>\n" );
                aMetadataStream.append( "    <rdf:Alt>\n" );
                aMetadataStream.append( "     <rdf:li xml:lang=\"x-default\">" );
                OUString aTitle;
                escapeStringXML( m_aContext.DocumentInfo.Title, aTitle );
                aMetadataStream.append( OUStringToOString( aTitle, RTL_TEXTENCODING_UTF8 )  );
                aMetadataStream.append( "</rdf:li>\n" );
                aMetadataStream.append( "    </rdf:Alt>\n" );
                aMetadataStream.append( "   </dc:title>\n" );
            }
            if( !m_aContext.DocumentInfo.Author.isEmpty() )
            {
                aMetadataStream.append( "   <dc:creator>\n" );
                aMetadataStream.append( "    <rdf:Seq>\n" );
                aMetadataStream.append( "     <rdf:li>" );
                OUString aAuthor;
                escapeStringXML( m_aContext.DocumentInfo.Author, aAuthor );
                aMetadataStream.append( OUStringToOString( aAuthor , RTL_TEXTENCODING_UTF8 )  );
                aMetadataStream.append( "</rdf:li>\n" );
                aMetadataStream.append( "    </rdf:Seq>\n" );
                aMetadataStream.append( "   </dc:creator>\n" );
            }
            if( !m_aContext.DocumentInfo.Subject.isEmpty() )
            {
                // this is according to PDF/A-1, technical corrigendum 1 (2007-04-01)
                aMetadataStream.append( "   <dc:description>\n" );
                aMetadataStream.append( "    <rdf:Alt>\n" );
                aMetadataStream.append( "     <rdf:li xml:lang=\"x-default\">" );
                OUString aSubject;
                escapeStringXML( m_aContext.DocumentInfo.Subject, aSubject );
                aMetadataStream.append( OUStringToOString( aSubject , RTL_TEXTENCODING_UTF8 )  );
                aMetadataStream.append( "</rdf:li>\n" );
                aMetadataStream.append( "    </rdf:Alt>\n" );
                aMetadataStream.append( "   </dc:description>\n" );
            }
            aMetadataStream.append( "  </rdf:Description>\n" );
        }
        if (m_bIsPDF_A1)
            aMetadata.mnPDF_A = 1;
        else if (m_bIsPDF_A2)
            aMetadata.mnPDF_A = 2;

        //... PDF properties go here
        if( !m_aContext.DocumentInfo.Producer.isEmpty() ||
            !m_aContext.DocumentInfo.Keywords.isEmpty() )
        if (!m_aContext.DocumentInfo.Title.isEmpty())
        {
            aMetadataStream.append( "  <rdf:Description rdf:about=\"\"\n" );
            aMetadataStream.append( "     xmlns:pdf=\"http://ns.adobe.com/pdf/1.3/\">\n" );
            if( !m_aContext.DocumentInfo.Producer.isEmpty() )
            {
                aMetadataStream.append( "   <pdf:Producer>" );
                OUString aProducer;
                escapeStringXML( m_aContext.DocumentInfo.Producer, aProducer );
                aMetadataStream.append( OUStringToOString( aProducer , RTL_TEXTENCODING_UTF8 )  );
                aMetadataStream.append( "</pdf:Producer>\n" );
            }
            if( !m_aContext.DocumentInfo.Keywords.isEmpty() )
            {
                aMetadataStream.append( "   <pdf:Keywords>" );
                OUString aKeywords;
                escapeStringXML( m_aContext.DocumentInfo.Keywords, aKeywords );
                aMetadataStream.append( OUStringToOString( aKeywords , RTL_TEXTENCODING_UTF8 )  );
                aMetadataStream.append( "</pdf:Keywords>\n" );
            }
            aMetadataStream.append( "  </rdf:Description>\n" );
            OUString aTempString;
            escapeStringXML(m_aContext.DocumentInfo.Title, aTempString);
            aMetadata.msTitle = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
        }

        aMetadataStream.append( "  <rdf:Description rdf:about=\"\"\n" );
        aMetadataStream.append( "    xmlns:xmp=\"http://ns.adobe.com/xap/1.0/\">\n" );
        if( !m_aContext.DocumentInfo.Creator.isEmpty() )
        if (!m_aContext.DocumentInfo.Author.isEmpty())
        {
            aMetadataStream.append( "   <xmp:CreatorTool>" );
            OUString aCreator;
            escapeStringXML( m_aContext.DocumentInfo.Creator, aCreator );
            aMetadataStream.append( OUStringToOString( aCreator , RTL_TEXTENCODING_UTF8 )  );
            aMetadataStream.append( "</xmp:CreatorTool>\n" );
            OUString aTempString;
            escapeStringXML(m_aContext.DocumentInfo.Author, aTempString);
            aMetadata.msAuthor = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
        }
        //creation date
        aMetadataStream.append( "   <xmp:CreateDate>" );
        aMetadataStream.append( m_aCreationMetaDateString );
        aMetadataStream.append( "</xmp:CreateDate>\n" );

        aMetadataStream.append( "  </rdf:Description>\n" );
        aMetadataStream.append( " </rdf:RDF>\n" );
        aMetadataStream.append( "</x:xmpmeta>\n" );

        //add the padding
        for( sal_Int32 nSpaces = 1; nSpaces <= 2100; nSpaces++ )
        if (!m_aContext.DocumentInfo.Subject.isEmpty())
        {
            aMetadataStream.append( " " );
            if( nSpaces % 100 == 0 )
                aMetadataStream.append( "\n" );
            OUString aTempString;
            escapeStringXML(m_aContext.DocumentInfo.Subject, aTempString);
            aMetadata.msSubject = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
        }

        aMetadataStream.append( "<?xpacket end=\"w\"?>\n" );
        if (!m_aContext.DocumentInfo.Producer.isEmpty())
        {
            OUString aTempString;
            escapeStringXML(m_aContext.DocumentInfo.Producer, aTempString);
            aMetadata.msProducer = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
        }
        if (!m_aContext.DocumentInfo.Keywords.isEmpty())
        {
            OUString aTempString;
            escapeStringXML(m_aContext.DocumentInfo.Keywords, aTempString);
            aMetadata.msKeywords = OUStringToOString(aTempString, RTL_TEXTENCODING_UTF8);
        }

        OStringBuffer aMetadataObj( 1024 );

@@ -5365,12 +5278,12 @@ sal_Int32 PDFWriterImpl::emitDocumentMetadata()

        aMetadataObj.append( "<</Type/Metadata/Subtype/XML/Length " );

        aMetadataObj.append( aMetadataStream.getLength() );
        aMetadataObj.append( sal_Int32(aMetadata.getSize()) );
        aMetadataObj.append( ">>\nstream\n" );
        if ( !writeBuffer( aMetadataObj.getStr(), aMetadataObj.getLength() ) )
            return 0;
        //emit the stream
        if ( !writeBuffer( aMetadataStream.getStr(), aMetadataStream.getLength() ) )
        if ( !writeBuffer( aMetadata.getData(), aMetadata.getSize() ) )
            return 0;

        aMetadataObj.setLength( 0 );
diff --git a/vcl/source/pdf/XmpMetadata.cxx b/vcl/source/pdf/XmpMetadata.cxx
new file mode 100644
index 0000000..d9033f4
--- /dev/null
+++ b/vcl/source/pdf/XmpMetadata.cxx
@@ -0,0 +1,159 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 */

#include <pdf/XmpMetadata.hxx>
#include <tools/XmlWriter.hxx>

namespace vcl::pdf
{
namespace
{
constexpr const char* constPadding = "                                        "
                                     "                                        "
                                     "                                        "
                                     "                                        "
                                     "                                        "
                                     "\n";
}

XmpMetadata::XmpMetadata()
    : mbWritten(false)
    , mnPDF_A(0)
{
}

void XmpMetadata::write()
{
    mpMemoryStream = std::make_unique<SvMemoryStream>(4096 /*Initial*/, 64 /*Resize*/);

    // Header
    mpMemoryStream->WriteOString("<?xpacket begin=\"");
    mpMemoryStream->WriteOString(OUStringToOString(OUString(u'\xFEFF'), RTL_TEXTENCODING_UTF8));
    mpMemoryStream->WriteOString("\" id=\"W5M0MpCehiHzreSzNTczkc9d\"?>\n");

    {
        tools::XmlWriter aXmlWriter(mpMemoryStream.get());
        aXmlWriter.startDocument(2, false);
        aXmlWriter.startElement("x", "xmpmeta", "adobe:ns:meta/");
        aXmlWriter.startElement("rdf", "RDF", "http://www.w3.org/1999/02/22-rdf-syntax-ns#");

        // PDF/A part ( ISO 19005-1:2005 - 6.7.11 )
        if (mnPDF_A > 0)
        {
            OString sPdfVersion = OString::number(mnPDF_A);
            OString sPdfConformance = (mnPDF_A == 1) ? "A" : "B";

            aXmlWriter.startElement("rdf:Description");
            aXmlWriter.attribute("rdf:about", OString(""));
            aXmlWriter.attribute("xmlns:pdfaid", OString("http://www.aiim.org/pdfa/ns/id/"));

            aXmlWriter.startElement("pdfaid:part");
            aXmlWriter.content(sPdfVersion);
            aXmlWriter.endElement();

            aXmlWriter.startElement("pdfaid:conformance");
            aXmlWriter.content(sPdfConformance);
            aXmlWriter.endElement();

            aXmlWriter.endElement();
        }

        // Dublin Core properties
        if (!msTitle.isEmpty() || !msAuthor.isEmpty() || !msSubject.isEmpty())
        {
            aXmlWriter.startElement("rdf:Description");
            aXmlWriter.attribute("rdf:about", OString(""));
            aXmlWriter.attribute("xmlns:dc", OString("http://purl.org/dc/elements/1.1/"));
            if (!msTitle.isEmpty())
            {
                // this is according to PDF/A-1, technical corrigendum 1 (2007-04-01)
                aXmlWriter.startElement("dc:title");
                aXmlWriter.startElement("rdf:Alt");
                aXmlWriter.startElement("rdf:li");
                aXmlWriter.attribute("xml:lang", OString("x-default"));
                aXmlWriter.content(msTitle);
                aXmlWriter.endElement();
                aXmlWriter.endElement();
                aXmlWriter.endElement();
            }
            if (!msAuthor.isEmpty())
            {
                aXmlWriter.startElement("dc:creator");
                aXmlWriter.startElement("rdf:Seq");
                aXmlWriter.startElement("rdf:li");
                aXmlWriter.content(msAuthor);
                aXmlWriter.endElement();
                aXmlWriter.endElement();
                aXmlWriter.endElement();
            }
            if (!msSubject.isEmpty())
            {
                aXmlWriter.startElement("dc:description");
                aXmlWriter.startElement("rdf:Alt");
                aXmlWriter.startElement("rdf:li");
                aXmlWriter.attribute("xml:lang", OString("x-default"));
                aXmlWriter.content(msSubject);
                aXmlWriter.endElement();
                aXmlWriter.endElement();
                aXmlWriter.endElement();
            }
            aXmlWriter.endElement();
        }

        // PDF properties
        if (!msProducer.isEmpty() || !msKeywords.isEmpty())
        {
            aXmlWriter.startElement("rdf:Description");
            aXmlWriter.attribute("rdf:about", OString(""));
            aXmlWriter.attribute("xmlns:pdf", OString("http://ns.adobe.com/pdf/1.3/"));
            if (!msProducer.isEmpty())
            {
                aXmlWriter.startElement("pdf:Producer");
                aXmlWriter.content(msProducer);
                aXmlWriter.endElement();
            }
            if (!msKeywords.isEmpty())
            {
                aXmlWriter.startElement("pdf:Keywords");
                aXmlWriter.content(msKeywords);
                aXmlWriter.endElement();
            }
            aXmlWriter.endElement();
        }
        aXmlWriter.endElement();
        aXmlWriter.endElement();
        aXmlWriter.endDocument();
    }

    // add padding (needed so the metadata can be changed in-place"
    for (sal_Int32 nSpaces = 1; nSpaces <= 21; nSpaces++)
        mpMemoryStream->WriteOString(constPadding);

    mpMemoryStream->WriteOString("<?xpacket end=\"w\"?>\n");
    mbWritten = true;
}

sal_uInt64 XmpMetadata::getSize()
{
    if (!mbWritten)
        write();
    return mpMemoryStream->GetSize();
}

const void* XmpMetadata::getData()
{
    if (!mbWritten)
        write();
    return mpMemoryStream->GetData();
}

} // end vcl::pdf

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */