tdf#140606 make PDF parsing more lenient and prevent a crash
If the external document can't be opened, it tried to continue
with the export anyway, which eventually lead to a crash. This
is fixed by handling this situation and prevent a crash, however
the part of the document in this case isn't exported.
The document couldn't be opened because of a parsing error - there
was a unexpected null character instead of a whitespace, which
made the parser panic. Fix this by making the parser more lenient
in such a situation when there is an unexpected null and try to
continue parsing.
Bug document seems to be created with a buggy PDF writer, but other
PDF readers don't complain when parsing the document so it looks to
be a valid. qpdf --check doesn't complain either.
Change-Id: I61eb281e821ccd195ef006d778556e25d1c7f5e3
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/111820
Tested-by: Jenkins
Reviewed-by: Tomaž Vajngerl <quikee@gmail.com>
(cherry picked from commit 2c1ed5a5dad827cde032f27a4348e81be15889bc)
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/111857
Reviewed-by: Adolfo Jayme Barrientos <fitojb@ubuntu.com>
diff --git a/vcl/inc/pdf/ExternalPDFStreams.hxx b/vcl/inc/pdf/ExternalPDFStreams.hxx
index ab3d057..0a1997f 100644
--- a/vcl/inc/pdf/ExternalPDFStreams.hxx
+++ b/vcl/inc/pdf/ExternalPDFStreams.hxx
@@ -33,21 +33,25 @@
std::map<sal_Int32, sal_Int32>& getCopiedResources() { return maCopiedResources; }
filter::PDFDocument& getPDFDocument()
std::shared_ptr<filter::PDFDocument>& getPDFDocument()
{
if (!mpPDFDocument)
{
SvMemoryStream aPDFStream;
aPDFStream.WriteBytes(maData.data(), maData.size());
aPDFStream.Seek(0);
mpPDFDocument = std::make_shared<filter::PDFDocument>();
if (!mpPDFDocument->Read(aPDFStream))
auto pPDFDocument = std::make_shared<filter::PDFDocument>();
if (!pPDFDocument->Read(aPDFStream))
{
SAL_WARN("vcl.pdfwriter",
"PDFWriterImpl::writeReferenceXObject: reading the PDF document failed");
}
else
{
mpPDFDocument = pPDFDocument;
}
}
return *mpPDFDocument;
return mpPDFDocument;
}
};
diff --git a/vcl/source/filter/ipdf/pdfdocument.cxx b/vcl/source/filter/ipdf/pdfdocument.cxx
index 41c44bd..804713a 100644
--- a/vcl/source/filter/ipdf/pdfdocument.cxx
+++ b/vcl/source/filter/ipdf/pdfdocument.cxx
@@ -1441,12 +1441,18 @@
}
else
{
if (!rtl::isAsciiWhiteSpace(static_cast<unsigned char>(ch)))
auto uChar = static_cast<unsigned char>(ch);
// Be more lenient and allow unexpected null char
if (!rtl::isAsciiWhiteSpace(uChar) && uChar != 0)
{
SAL_WARN("vcl.filter", "PDFDocument::Tokenize: unexpected character: "
<< ch << " at byte position " << rStream.Tell());
SAL_WARN("vcl.filter",
"PDFDocument::Tokenize: unexpected character with code "
<< sal_Int32(ch) << " at byte position " << rStream.Tell());
return false;
}
SAL_WARN_IF(uChar == 0, "vcl.filter",
"PDFDocument::Tokenize: unexpected null character at "
<< rStream.Tell() << " - ignoring");
}
break;
}
diff --git a/vcl/source/gdi/pdfwriter_impl.cxx b/vcl/source/gdi/pdfwriter_impl.cxx
index 951cda0f..eeeb4305 100644
--- a/vcl/source/gdi/pdfwriter_impl.cxx
+++ b/vcl/source/gdi/pdfwriter_impl.cxx
@@ -8459,10 +8459,16 @@
// object.
if (rEmit.m_nExternalPDFDataIndex < 0)
return;
auto & rExternalPDFStream = m_aExternalPDFStreams.get(rEmit.m_nExternalPDFDataIndex);
auto & rPDFDocument = rExternalPDFStream.getPDFDocument();
auto& rExternalPDFStream = m_aExternalPDFStreams.get(rEmit.m_nExternalPDFDataIndex);
auto& pPDFDocument = rExternalPDFStream.getPDFDocument();
if (!pPDFDocument)
{
// Couldn't parse the document and can't continue
SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: failed to parse the document");
return;
}
std::vector<filter::PDFObjectElement*> aPages = rPDFDocument.GetPages();
std::vector<filter::PDFObjectElement*> aPages = pPDFDocument->GetPages();
if (aPages.empty())
{
SAL_WARN("vcl.pdfwriter", "PDFWriterImpl::writeReferenceXObject: no pages");