tdf#96401: allow to detect a broken ZIP package

In deep detection, first check if it's a broken ZIP package. If it is,
set the RepairPackage media descriptor property to true.

Pass the RepairPackage value to the OOXML filter detection.

Change-Id: Ic958283f3cce92ac29ce93ac330cc9e409e3eb78
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159976
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
diff --git a/filter/Library_filterconfig.mk b/filter/Library_filterconfig.mk
index 38251aa..7d93187 100644
--- a/filter/Library_filterconfig.mk
+++ b/filter/Library_filterconfig.mk
@@ -41,6 +41,7 @@ $(eval $(call gb_Library_use_libraries,filterconfig,\
	cppu \
	sal \
	salhelper \
	sfx \
	i18nlangtag \
))

diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx
index a83a140..5bc4f98 100644
--- a/filter/source/config/cache/typedetection.cxx
+++ b/filter/source/config/cache/typedetection.cxx
@@ -25,9 +25,14 @@
#include <com/sun/star/util/URLTransformer.hpp>
#include <com/sun/star/util/XURLTransformer.hpp>

#include <com/sun/star/beans/XPropertySet.hpp>
#include <com/sun/star/embed/StorageFormats.hpp>
#include <com/sun/star/io/XInputStream.hpp>
#include <com/sun/star/io/XSeekable.hpp>
#include <com/sun/star/packages/zip/ZipIOException.hpp>
#include <com/sun/star/task/XInteractionHandler.hpp>

#include <sfx2/brokenpackageint.hxx>
#include <o3tl/string_view.hxx>
#include <tools/wldcrd.hxx>
#include <sal/log.hxx>
@@ -832,6 +837,50 @@ void TypeDetection::impl_getAllFormatTypes(
}


static bool isBrokenZIP(const css::uno::Reference<css::io::XInputStream>& xStream,
                        const css::uno::Reference<css::uno::XComponentContext>& xContext)
{
    std::vector<css::uno::Any> aArguments{
        css::uno::Any(xStream),
        css::uno::Any(css::beans::NamedValue("AllowRemoveOnInsert", css::uno::Any(false))),
        css::uno::Any(css::beans::NamedValue("StorageFormat",
                                             css::uno::Any(css::embed::StorageFormats::ZIP))),
    };
    try
    {
        // If this is a broken ZIP package, or not a ZIP, this would throw ZipIOException
        xContext->getServiceManager()->createInstanceWithArgumentsAndContext(
            "com.sun.star.packages.comp.ZipPackage", comphelper::containerToSequence(aArguments),
            xContext);
    }
    catch (const css::packages::zip::ZipIOException&)
    {
        // Now test if repair will succeed
        aArguments.emplace_back(css::beans::NamedValue("RepairPackage", css::uno::Any(true)));
        try
        {
            // If this is a broken ZIP package that can be repaired, this would succeed,
            // and the result will be not empty
            if (css::uno::Reference<css::beans::XPropertySet> xPackage{
                    xContext->getServiceManager()->createInstanceWithArgumentsAndContext(
                        "com.sun.star.packages.comp.ZipPackage",
                        comphelper::containerToSequence(aArguments), xContext),
                    css::uno::UNO_QUERY })
                if (bool bHasElements; xPackage->getPropertyValue("HasElements") >>= bHasElements)
                    return bHasElements;
        }
        catch (const css::uno::Exception&)
        {
        }
    }
    catch (const css::uno::Exception&)
    {
    }
    // The package is either not broken, or is not a repairable ZIP
    return false;
}


OUString TypeDetection::impl_detectTypeFlatAndDeep(      utl::MediaDescriptor& rDescriptor   ,
                                                          const FlatDetection&                 lFlatTypes    ,
                                                                bool                       bAllowDeep    ,
@@ -841,6 +890,59 @@ OUString TypeDetection::impl_detectTypeFlatAndDeep(      utl::MediaDescriptor& r
    // a set and a not set value.
    rLastChance.clear();

    // tdf#96401: First of all, check if this is a broken ZIP package. Not doing this here would
    // make some filters silently not recognize their content in broken packages, and some filters
    // show a warning and mistakenly claim own content based on user choice.
    if (bAllowDeep && !rDescriptor.getUnpackedValueOrDefault("RepairPackage", false)
        && rDescriptor.getUnpackedValueOrDefault("RepairAllowed", true)
        && rDescriptor.contains(utl::MediaDescriptor::PROP_INTERACTIONHANDLER))
    {
        try
        {
            impl_openStream(rDescriptor);
            if (auto xStream = rDescriptor.getUnpackedValueOrDefault(
                    utl::MediaDescriptor::PROP_INPUTSTREAM,
                    css::uno::Reference<css::io::XInputStream>()))
            {
                css::uno::Reference<css::uno::XComponentContext> xContext;

                // SAFE ->
                {
                    std::unique_lock aLock(m_aMutex);
                    xContext = m_xContext;
                }
                // <- SAFE

                if (isBrokenZIP(xStream, xContext))
                {
                    if (css::uno::Reference<css::task::XInteractionHandler> xInteraction{
                            rDescriptor.getValue(utl::MediaDescriptor::PROP_INTERACTIONHANDLER),
                            css::uno::UNO_QUERY })
                    {
                        INetURLObject aURL(rDescriptor.getUnpackedValueOrDefault(
                            utl::MediaDescriptor::PROP_URL, OUString()));
                        OUString aDocumentTitle
                            = aURL.getName(INetURLObject::LAST_SEGMENT, true,
                                           INetURLObject::DecodeMechanism::WithCharset);

                        // Ask the user whether they wants to try to repair
                        RequestPackageReparation aRequest(aDocumentTitle);
                        xInteraction->handle(aRequest.GetRequest());

                        if (aRequest.isApproved())
                            rDescriptor["RepairPackage"] <<= true;
                        else
                            rDescriptor["RepairAllowed"] <<= false; // Do not ask again
                    }
                }
            }
        }
        catch (const css::uno::Exception&)
        {
            // No problem
        }
    }

    // step over all possible types for this URL.
    // solutions:
    // a) no types                                => no detection
diff --git a/include/oox/helper/zipstorage.hxx b/include/oox/helper/zipstorage.hxx
index dec4b48..dabb714 100644
--- a/include/oox/helper/zipstorage.hxx
+++ b/include/oox/helper/zipstorage.hxx
@@ -43,7 +43,8 @@ class ZipStorage final : public StorageBase
public:
    explicit            ZipStorage(
                            const css::uno::Reference< css::uno::XComponentContext >& rxContext,
                            const css::uno::Reference< css::io::XInputStream >& rxInStream );
                            const css::uno::Reference< css::io::XInputStream >& rxInStream,
                            bool bRepairStorage = false );

    explicit            ZipStorage(
                            const css::uno::Reference< css::uno::XComponentContext >& rxContext,
diff --git a/oox/source/core/filterdetect.cxx b/oox/source/core/filterdetect.cxx
index 7c52f81..cbee8cf 100644
--- a/oox/source/core/filterdetect.cxx
+++ b/oox/source/core/filterdetect.cxx
@@ -428,7 +428,8 @@ OUString SAL_CALL FilterDetect::detect( Sequence< PropertyValue >& rMediaDescSeq
        Reference< XInputStream > xInputStream( extractUnencryptedPackage( aMediaDescriptor ), UNO_SET_THROW );

        // stream must be a ZIP package
        ZipStorage aZipStorage( mxContext, xInputStream );
        ZipStorage aZipStorage(mxContext, xInputStream,
                               aMediaDescriptor.getUnpackedValueOrDefault("RepairPackage", false));
        if( aZipStorage.isStorage() )
        {
            // create the fast parser, register the XML namespaces, set document handler
diff --git a/oox/source/helper/zipstorage.cxx b/oox/source/helper/zipstorage.cxx
index 10f7d79..db73b14 100644
--- a/oox/source/helper/zipstorage.cxx
+++ b/oox/source/helper/zipstorage.cxx
@@ -38,7 +38,7 @@ using namespace ::com::sun::star::io;
using namespace ::com::sun::star::lang;
using namespace ::com::sun::star::uno;

ZipStorage::ZipStorage( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStream ) :
ZipStorage::ZipStorage( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStream, bool bRepairStorage ) :
    StorageBase( rxInStream, false )
{
    OSL_ENSURE( rxContext.is(), "ZipStorage::ZipStorage - missing component context" );
@@ -61,7 +61,7 @@ ZipStorage::ZipStorage( const Reference< XComponentContext >& rxContext, const R
            implementation of relations handling.
         */
        mxStorage = ::comphelper::OStorageHelper::GetStorageOfFormatFromInputStream(
            ZIP_STORAGE_FORMAT_STRING, rxInStream, rxContext, false);
            ZIP_STORAGE_FORMAT_STRING, rxInStream, rxContext, bRepairStorage);
    }
    catch (Exception const&)
    {
diff --git a/package/source/zippackage/ZipPackage.cxx b/package/source/zippackage/ZipPackage.cxx
index 39a8e17..4dc2021 100644
--- a/package/source/zippackage/ZipPackage.cxx
+++ b/package/source/zippackage/ZipPackage.cxx
@@ -1801,6 +1801,10 @@ Any SAL_CALL ZipPackage::getPropertyValue( const OUString& PropertyName )
    {
        return Any(m_bMediaTypeFallbackUsed);
    }
    else if (PropertyName == "HasElements")
    {
        return Any(m_pZipFile && m_pZipFile->entries().hasMoreElements());
    }
    throw UnknownPropertyException(PropertyName);
}
void SAL_CALL ZipPackage::addPropertyChangeListener( const OUString& /*aPropertyName*/, const uno::Reference< XPropertyChangeListener >& /*xListener*/ )