xmlsecurity PDF verify: last batch of various fixes

This is a combination of 5 commits:

1) xmlsecurity PDF verify: don't abort read on partial sign

Map it to the partially signed (not all streams) ODF concept instead.

(cherry picked from commit e84993486b46ed86a8540b985355e82db5559720)

2) xmlsecurity PDF verify: fix reading names containing ']'

Also fix parsing '<< /Foo [ /Bar ] >>'.

(cherry picked from commit cdf2ae1b6611976816fa60aae370893657c622d0)

3) xmlsecurity PDF verify: handle no EOL at EOF

From a comment's point of view, EOF is just a terminator, similar to \r
or \n.

(cherry picked from commit b1f91c0a04dd751d4f6cb8352bcbaa16c9388285)

4) xmlsecurity PDF verify: avoid seeking before the start of the stream

Happened when the doc was smaller than 1024 bytes.

(cherry picked from commit c4cb8b5d1460bbf080366817d26c08685490d541)

5) xmlsecurity PDF verify: don't hide signatures where digest match is uncertain

Use case: the bugdoc has 2 signatures, one normal one and one with
SubFilter=ETSI.RFC3161. By not hiding the second signature it's possible
to counter-sign the document, even if we don't handle the contents of
the second one.

(cherry picked from commit 61c81c4500e5d5849b43d3a9d3efdabba94d513b)

Change-Id: I580e1211072ec9839f01b529b569c98b702b6534
Reviewed-on: https://gerrit.libreoffice.org/31557
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk>
diff --git a/xmlsecurity/inc/sigstruct.hxx b/xmlsecurity/inc/sigstruct.hxx
index c217352..29eeb72 100644
--- a/xmlsecurity/inc/sigstruct.hxx
+++ b/xmlsecurity/inc/sigstruct.hxx
@@ -106,6 +106,8 @@ struct SignatureInformation
    sal_Int32 nDigestID;
    /// For PDF: has id-aa-signingCertificateV2 as a signed attribute.
    bool bHasSigningCertificate;
    /// For PDF: the byte range doesn't cover the whole document.
    bool bPartialDocumentSignature;

    SignatureInformation( sal_Int32 nId )
    {
@@ -113,6 +115,7 @@ struct SignatureInformation
        nStatus = css::xml::crypto::SecurityOperationStatus_UNKNOWN;
        nDigestID = 0;
        bHasSigningCertificate = false;
        bPartialDocumentSignature = false;
    }
};

diff --git a/xmlsecurity/qa/unit/pdfsigning/data/name-bracket.pdf b/xmlsecurity/qa/unit/pdfsigning/data/name-bracket.pdf
new file mode 100644
index 0000000..ac15279
--- /dev/null
+++ b/xmlsecurity/qa/unit/pdfsigning/data/name-bracket.pdf
Binary files differ
diff --git a/xmlsecurity/qa/unit/pdfsigning/data/noeol.pdf b/xmlsecurity/qa/unit/pdfsigning/data/noeol.pdf
new file mode 100644
index 0000000..d870f89
--- /dev/null
+++ b/xmlsecurity/qa/unit/pdfsigning/data/noeol.pdf
Binary files differ
diff --git a/xmlsecurity/qa/unit/pdfsigning/data/partial.pdf b/xmlsecurity/qa/unit/pdfsigning/data/partial.pdf
new file mode 100644
index 0000000..890f562
--- /dev/null
+++ b/xmlsecurity/qa/unit/pdfsigning/data/partial.pdf
Binary files differ
diff --git a/xmlsecurity/qa/unit/pdfsigning/data/small.pdf b/xmlsecurity/qa/unit/pdfsigning/data/small.pdf
new file mode 100644
index 0000000..6067545
--- /dev/null
+++ b/xmlsecurity/qa/unit/pdfsigning/data/small.pdf
Binary files differ
diff --git a/xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx b/xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx
index 1ecbb22..2da9c0e 100644
--- a/xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx
+++ b/xmlsecurity/qa/unit/pdfsigning/pdfsigning.cxx
@@ -64,12 +64,16 @@ public:
    void testPDF14LOWin();
    /// Test a PAdES document, signed by LO on Linux.
    void testPDFPAdESGood();
    /// Test a valid signature that does not cover the whole file.
    void testPartial();
    /// Test writing a PAdES signature.
    void testSigningCertificateAttribute();
    /// Test that we accept files which are supposed to be good.
    void testGood();
    /// Test that we don't crash / loop while tokenizing these files.
    void testTokenize();
    /// Test handling of unknown SubFilter values.
    void testUnknownSubFilter();

    CPPUNIT_TEST_SUITE(PDFSigningTest);
    CPPUNIT_TEST(testPDFAdd);
@@ -81,9 +85,11 @@ public:
    CPPUNIT_TEST(testPDF16Add);
    CPPUNIT_TEST(testPDF14LOWin);
    CPPUNIT_TEST(testPDFPAdESGood);
    CPPUNIT_TEST(testPartial);
    CPPUNIT_TEST(testSigningCertificateAttribute);
    CPPUNIT_TEST(testGood);
    CPPUNIT_TEST(testTokenize);
    CPPUNIT_TEST(testUnknownSubFilter);
    CPPUNIT_TEST_SUITE_END();
};

@@ -331,6 +337,14 @@ void PDFSigningTest::testPDFPAdESGood()
    verify(m_directories.getURLFromSrc(DATA_DIRECTORY) + "good-pades.pdf", 1, "ETSI.CAdES.detached");
}

void PDFSigningTest::testPartial()
{
    std::vector<SignatureInformation> aInfos = verify(m_directories.getURLFromSrc(DATA_DIRECTORY) + "partial.pdf", 1, /*rExpectedSubFilter=*/OString());
    CPPUNIT_ASSERT(!aInfos.empty());
    SignatureInformation& rInformation = aInfos[0];
    CPPUNIT_ASSERT(rInformation.bPartialDocumentSignature);
}

void PDFSigningTest::testSigningCertificateAttribute()
{
    // Create a new signature.
@@ -377,8 +391,12 @@ void PDFSigningTest::testTokenize()
    {
        // We looped on this broken input.
        OUStringLiteral("no-eof.pdf"),
        // Failed to read as \r wasn't handled as terminating a comment.
        OUStringLiteral("cr-comment.pdf"),
        // ']' in a name token was mishandled.
        OUStringLiteral("name-bracket.pdf"),
        // %%EOF at the end wasn't followed by a newline.
        OUStringLiteral("noeol.pdf"),
        // File that's intentionally smaller than 1024 bytes.
        OUStringLiteral("small.pdf"),
    };

    for (const auto& rName : aNames)
@@ -390,6 +408,22 @@ void PDFSigningTest::testTokenize()
    }
}

void PDFSigningTest::testUnknownSubFilter()
{
    // Tokenize the bugdoc.
    uno::Reference<xml::crypto::XSEInitializer> xSEInitializer = xml::crypto::SEInitializer::create(mxComponentContext);
    uno::Reference<xml::crypto::XXMLSecurityContext> xSecurityContext = xSEInitializer->createSecurityContext(OUString());
    SvStream* pStream = utl::UcbStreamHelper::CreateStream(m_directories.getURLFromSrc(DATA_DIRECTORY) + "cr-comment.pdf", StreamMode::READ | StreamMode::WRITE);
    uno::Reference<io::XStream> xStream(new utl::OStreamWrapper(*pStream));
    DocumentSignatureManager aManager(mxComponentContext, DocumentSignatureMode::Content);
    aManager.mxSignatureStream = xStream;
    aManager.read(/*bUseTempStream=*/false);

    // Make sure we find both signatures, even if the second has unknown SubFilter.
    std::vector<SignatureInformation>& rInformations = aManager.maCurrentSignatureInformations;
    CPPUNIT_ASSERT_EQUAL(static_cast<std::size_t>(2), rInformations.size());
}

CPPUNIT_TEST_SUITE_REGISTRATION(PDFSigningTest);

CPPUNIT_PLUGIN_IMPLEMENT();
diff --git a/xmlsecurity/source/helper/pdfsignaturehelper.cxx b/xmlsecurity/source/helper/pdfsignaturehelper.cxx
index a7cfbed..4218a83 100644
--- a/xmlsecurity/source/helper/pdfsignaturehelper.cxx
+++ b/xmlsecurity/source/helper/pdfsignaturehelper.cxx
@@ -56,10 +56,7 @@ bool PDFSignatureHelper::ReadAndVerifySignature(const uno::Reference<io::XInputS

        bool bLast = i == aSignatures.size() - 1;
        if (!xmlsecurity::pdfio::PDFDocument::ValidateSignature(*pStream, aSignatures[i], aInfo, bLast))
        {
            SAL_WARN("xmlsecurity.helper", "failed to determine digest match");
            continue;
        }

        m_aSignatureInfos.push_back(aInfo);
    }
@@ -82,6 +79,7 @@ uno::Sequence<security::DocumentSignatureInformation> PDFSignatureHelper::GetDoc
        security::DocumentSignatureInformation& rExternal = aRet[i];
        rExternal.SignatureIsValid = rInternal.nStatus == xml::crypto::SecurityOperationStatus_OPERATION_SUCCEEDED;
        rExternal.Signer = xSecEnv->createCertificateFromAscii(rInternal.ouX509Certificate);
        rExternal.PartialDocumentSignature = rInternal.bPartialDocumentSignature;

        // Verify certificate.
        if (rExternal.Signer.is())
diff --git a/xmlsecurity/source/pdfio/pdfdocument.cxx b/xmlsecurity/source/pdfio/pdfdocument.cxx
index b19a043..fd36477 100644
--- a/xmlsecurity/source/pdfio/pdfdocument.cxx
+++ b/xmlsecurity/source/pdfio/pdfdocument.cxx
@@ -1009,7 +1009,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
            rElements.push_back(std::unique_ptr<PDFElement>(pComment));
            rStream.SeekRel(-1);
            if (!rElements.back()->Read(rStream))
            {
                SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFCommentElement::Read() failed");
                return false;
            }
            if (eMode == TokenizeMode::EOF_TOKEN && !m_aEOFs.empty() && m_aEOFs.back() == rStream.Tell())
            {
                // Found EOF and partial parsing requested, we're done.
@@ -1030,7 +1033,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
            else
                rElements.push_back(std::unique_ptr<PDFElement>(new PDFHexStringElement()));
            if (!rElements.back()->Read(rStream))
            {
                SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFDictionaryElement::Read() failed");
                return false;
            }
            break;
        }
        case '>':
@@ -1039,7 +1045,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
            --nDictionaryDepth;
            rStream.SeekRel(-1);
            if (!rElements.back()->Read(rStream))
            {
                SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFEndDictionaryElement::Read() failed");
                return false;
            }
            break;
        }
        case '[':
@@ -1055,7 +1064,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
            }
            rStream.SeekRel(-1);
            if (!rElements.back()->Read(rStream))
            {
                SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFArrayElement::Read() failed");
                return false;
            }
            break;
        }
        case ']':
@@ -1064,7 +1076,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
            pArray = nullptr;
            rStream.SeekRel(-1);
            if (!rElements.back()->Read(rStream))
            {
                SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFEndArrayElement::Read() failed");
                return false;
            }
            break;
        }
        case '/':
@@ -1073,7 +1088,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
            rElements.push_back(std::unique_ptr<PDFElement>(pNameElement));
            rStream.SeekRel(-1);
            if (!pNameElement->Read(rStream))
            {
                SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFNameElement::Read() failed");
                return false;
            }
            if (pObject && pObjectKey && pObjectKey->GetValue() == "Type" && pNameElement->GetValue() == "ObjStm")
                pObjectStream = pObject;
            else
@@ -1085,7 +1103,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
            rElements.push_back(std::unique_ptr<PDFElement>(new PDFLiteralStringElement()));
            rStream.SeekRel(-1);
            if (!rElements.back()->Read(rStream))
            {
                SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFLiteralStringElement::Read() failed");
                return false;
            }
            break;
        }
        default:
@@ -1097,7 +1118,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
                rElements.push_back(std::unique_ptr<PDFElement>(pNumberElement));
                rStream.SeekRel(-1);
                if (!pNumberElement->Read(rStream))
                {
                    SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFNumberElement::Read() failed");
                    return false;
                }
                if (bInStartXRef)
                {
                    bInStartXRef = false;
@@ -1147,7 +1171,10 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
                            pArray->PushBack(rElements.back().get());
                    }
                    if (!rElements.back()->Read(rStream))
                    {
                        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFElement::Read() failed");
                        return false;
                    }
                }
                else if (aKeyword == "stream")
                {
@@ -1189,19 +1216,28 @@ bool PDFDocument::Tokenize(SvStream& rStream, TokenizeMode eMode, std::vector< s
                        pObject->SetStream(pStreamElement);
                    rElements.push_back(std::unique_ptr<PDFElement>(pStreamElement));
                    if (!rElements.back()->Read(rStream))
                    {
                        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFStreamElement::Read() failed");
                        return false;
                    }
                }
                else if (aKeyword == "endstream")
                {
                    rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndStreamElement()));
                    if (!rElements.back()->Read(rStream))
                    {
                        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFEndStreamElement::Read() failed");
                        return false;
                    }
                }
                else if (aKeyword == "endobj")
                {
                    rElements.push_back(std::unique_ptr<PDFElement>(new PDFEndObjectElement()));
                    if (!rElements.back()->Read(rStream))
                    {
                        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::Tokenize: PDFEndObjectElement::Read() failed");
                        return false;
                    }
                    if (eMode == TokenizeMode::END_OF_OBJECT)
                    {
                        // Found endobj and only object parsing was requested, we're done.
@@ -1356,7 +1392,11 @@ size_t PDFDocument::FindStartXRef(SvStream& rStream)
    // Find the "startxref" token, somewhere near the end of the document.
    std::vector<char> aBuf(1024);
    rStream.Seek(STREAM_SEEK_TO_END);
    rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
    if (rStream.Tell() > aBuf.size())
        rStream.SeekRel(static_cast<sal_Int64>(-1) * aBuf.size());
    else
        // The document is really short, then just read it from the start.
        rStream.Seek(0);
    size_t nBeforePeek = rStream.Tell();
    size_t nSize = rStream.ReadBytes(aBuf.data(), aBuf.size());
    rStream.Seek(nBeforePeek);
@@ -2212,10 +2252,8 @@ bool PDFDocument::ValidateSignature(SvStream& rStream, PDFObjectElement* pSignat
    rStream.Seek(STREAM_SEEK_TO_END);
    size_t nFileEnd = rStream.Tell();
    if (bLast && (aByteRanges[1].first + aByteRanges[1].second) != nFileEnd)
    {
        SAL_WARN("xmlsecurity.pdfio", "PDFDocument::ValidateSignature: second range end is not the end of the file");
        return false;
    }
        // Second range end is not the end of the file.
        rInformation.bPartialDocumentSignature = true;

    // At this point there is no obviously missing info to validate the
    // signature.
@@ -2621,13 +2659,13 @@ PDFCommentElement::PDFCommentElement(PDFDocument& rDoc)

bool PDFCommentElement::Read(SvStream& rStream)
{
    // Read from (including) the % char till (excluding) the end of the line.
    // Read from (including) the % char till (excluding) the end of the line/stream.
    OStringBuffer aBuf;
    char ch;
    rStream.ReadChar(ch);
    while (!rStream.IsEof())
    while (true)
    {
        if (ch == '\n' || ch == '\r')
        if (ch == '\n' || ch == '\r' || rStream.IsEof())
        {
            m_aComment = aBuf.makeStringAndClear();

@@ -2908,14 +2946,19 @@ size_t PDFDictionaryElement::Parse(const std::vector< std::unique_ptr<PDFElement
            }
            else
            {
                // Name-name key-value.
                rDictionary[aName] = pName;
                if (pThisDictionary)
                if (pArray)
                    pArray->PushBack(pName);
                else
                {
                    pThisDictionary->SetKeyOffset(aName, nNameOffset);
                    pThisDictionary->SetKeyValueLength(aName, pName->GetLocation() + pName->GetLength() - nNameOffset);
                    // Name-name key-value.
                    rDictionary[aName] = pName;
                    if (pThisDictionary)
                    {
                        pThisDictionary->SetKeyOffset(aName, nNameOffset);
                        pThisDictionary->SetKeyValueLength(aName, pName->GetLocation() + pName->GetLength() - nNameOffset);
                    }
                    aName.clear();
                }
                aName.clear();
            }
            continue;
        }
@@ -3489,7 +3532,7 @@ bool PDFNameElement::Read(SvStream& rStream)
    rStream.ReadChar(ch);
    while (!rStream.IsEof())
    {
        if (isspace(ch) || ch == '/' || ch == '[' || ch == '<' || ch == '>' || ch == '(')
        if (isspace(ch) || ch == '/' || ch == '[' || ch == ']' || ch == '<' || ch == '>' || ch == '(')
        {
            rStream.SeekRel(-1);
            m_aValue = aBuf.makeStringAndClear();