From 86c682273d907c77404637c89e584047de1c1099 Mon Sep 17 00:00:00 2001 From: Mike Kaganski Date: Sun, 26 Nov 2023 20:18:34 +0300 Subject: [PATCH] tdf#96401: allow to detect a broken ZIP package In deep detection, first check if it's a broken ZIP package. If it is, set the RepairPackage media descriptor property to true. Pass the RepairPackage value to the OOXML filter detection. Change-Id: Ic958283f3cce92ac29ce93ac330cc9e409e3eb78 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/159976 Tested-by: Jenkins Reviewed-by: Mike Kaganski --- filter/Library_filterconfig.mk | 1 + filter/source/config/cache/typedetection.cxx | 102 +++++++++++++++++++ include/oox/helper/zipstorage.hxx | 3 +- oox/source/core/filterdetect.cxx | 3 +- oox/source/helper/zipstorage.cxx | 4 +- package/source/zippackage/ZipPackage.cxx | 4 + 6 files changed, 113 insertions(+), 4 deletions(-) diff --git a/filter/Library_filterconfig.mk b/filter/Library_filterconfig.mk index 38251aa313f0..7d9318781eb3 100644 --- a/filter/Library_filterconfig.mk +++ b/filter/Library_filterconfig.mk @@ -41,6 +41,7 @@ $(eval $(call gb_Library_use_libraries,filterconfig,\ cppu \ sal \ salhelper \ + sfx \ i18nlangtag \ )) diff --git a/filter/source/config/cache/typedetection.cxx b/filter/source/config/cache/typedetection.cxx index a83a1406d0c1..5bc4f98b82f5 100644 --- a/filter/source/config/cache/typedetection.cxx +++ b/filter/source/config/cache/typedetection.cxx @@ -25,9 +25,14 @@ #include #include +#include +#include #include #include +#include #include + +#include #include #include #include @@ -832,6 +837,50 @@ void TypeDetection::impl_getAllFormatTypes( } +static bool isBrokenZIP(const css::uno::Reference& xStream, + const css::uno::Reference& xContext) +{ + std::vector aArguments{ + css::uno::Any(xStream), + css::uno::Any(css::beans::NamedValue("AllowRemoveOnInsert", css::uno::Any(false))), + css::uno::Any(css::beans::NamedValue("StorageFormat", + css::uno::Any(css::embed::StorageFormats::ZIP))), + }; + try + { + // If this is a broken ZIP package, or not a ZIP, this would throw ZipIOException + xContext->getServiceManager()->createInstanceWithArgumentsAndContext( + "com.sun.star.packages.comp.ZipPackage", comphelper::containerToSequence(aArguments), + xContext); + } + catch (const css::packages::zip::ZipIOException&) + { + // Now test if repair will succeed + aArguments.emplace_back(css::beans::NamedValue("RepairPackage", css::uno::Any(true))); + try + { + // If this is a broken ZIP package that can be repaired, this would succeed, + // and the result will be not empty + if (css::uno::Reference xPackage{ + xContext->getServiceManager()->createInstanceWithArgumentsAndContext( + "com.sun.star.packages.comp.ZipPackage", + comphelper::containerToSequence(aArguments), xContext), + css::uno::UNO_QUERY }) + if (bool bHasElements; xPackage->getPropertyValue("HasElements") >>= bHasElements) + return bHasElements; + } + catch (const css::uno::Exception&) + { + } + } + catch (const css::uno::Exception&) + { + } + // The package is either not broken, or is not a repairable ZIP + return false; +} + + OUString TypeDetection::impl_detectTypeFlatAndDeep( utl::MediaDescriptor& rDescriptor , const FlatDetection& lFlatTypes , bool bAllowDeep , @@ -841,6 +890,59 @@ OUString TypeDetection::impl_detectTypeFlatAndDeep( utl::MediaDescriptor& r // a set and a not set value. rLastChance.clear(); + // tdf#96401: First of all, check if this is a broken ZIP package. Not doing this here would + // make some filters silently not recognize their content in broken packages, and some filters + // show a warning and mistakenly claim own content based on user choice. + if (bAllowDeep && !rDescriptor.getUnpackedValueOrDefault("RepairPackage", false) + && rDescriptor.getUnpackedValueOrDefault("RepairAllowed", true) + && rDescriptor.contains(utl::MediaDescriptor::PROP_INTERACTIONHANDLER)) + { + try + { + impl_openStream(rDescriptor); + if (auto xStream = rDescriptor.getUnpackedValueOrDefault( + utl::MediaDescriptor::PROP_INPUTSTREAM, + css::uno::Reference())) + { + css::uno::Reference xContext; + + // SAFE -> + { + std::unique_lock aLock(m_aMutex); + xContext = m_xContext; + } + // <- SAFE + + if (isBrokenZIP(xStream, xContext)) + { + if (css::uno::Reference xInteraction{ + rDescriptor.getValue(utl::MediaDescriptor::PROP_INTERACTIONHANDLER), + css::uno::UNO_QUERY }) + { + INetURLObject aURL(rDescriptor.getUnpackedValueOrDefault( + utl::MediaDescriptor::PROP_URL, OUString())); + OUString aDocumentTitle + = aURL.getName(INetURLObject::LAST_SEGMENT, true, + INetURLObject::DecodeMechanism::WithCharset); + + // Ask the user whether they wants to try to repair + RequestPackageReparation aRequest(aDocumentTitle); + xInteraction->handle(aRequest.GetRequest()); + + if (aRequest.isApproved()) + rDescriptor["RepairPackage"] <<= true; + else + rDescriptor["RepairAllowed"] <<= false; // Do not ask again + } + } + } + } + catch (const css::uno::Exception&) + { + // No problem + } + } + // step over all possible types for this URL. // solutions: // a) no types => no detection diff --git a/include/oox/helper/zipstorage.hxx b/include/oox/helper/zipstorage.hxx index dec4b483ea3f..dabb714d7db8 100644 --- a/include/oox/helper/zipstorage.hxx +++ b/include/oox/helper/zipstorage.hxx @@ -43,7 +43,8 @@ class ZipStorage final : public StorageBase public: explicit ZipStorage( const css::uno::Reference< css::uno::XComponentContext >& rxContext, - const css::uno::Reference< css::io::XInputStream >& rxInStream ); + const css::uno::Reference< css::io::XInputStream >& rxInStream, + bool bRepairStorage = false ); explicit ZipStorage( const css::uno::Reference< css::uno::XComponentContext >& rxContext, diff --git a/oox/source/core/filterdetect.cxx b/oox/source/core/filterdetect.cxx index 7c52f81e615d..cbee8cf1d933 100644 --- a/oox/source/core/filterdetect.cxx +++ b/oox/source/core/filterdetect.cxx @@ -428,7 +428,8 @@ OUString SAL_CALL FilterDetect::detect( Sequence< PropertyValue >& rMediaDescSeq Reference< XInputStream > xInputStream( extractUnencryptedPackage( aMediaDescriptor ), UNO_SET_THROW ); // stream must be a ZIP package - ZipStorage aZipStorage( mxContext, xInputStream ); + ZipStorage aZipStorage(mxContext, xInputStream, + aMediaDescriptor.getUnpackedValueOrDefault("RepairPackage", false)); if( aZipStorage.isStorage() ) { // create the fast parser, register the XML namespaces, set document handler diff --git a/oox/source/helper/zipstorage.cxx b/oox/source/helper/zipstorage.cxx index 10f7d79c25f1..db73b14bdd6c 100644 --- a/oox/source/helper/zipstorage.cxx +++ b/oox/source/helper/zipstorage.cxx @@ -38,7 +38,7 @@ using namespace ::com::sun::star::io; using namespace ::com::sun::star::lang; using namespace ::com::sun::star::uno; -ZipStorage::ZipStorage( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStream ) : +ZipStorage::ZipStorage( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStream, bool bRepairStorage ) : StorageBase( rxInStream, false ) { OSL_ENSURE( rxContext.is(), "ZipStorage::ZipStorage - missing component context" ); @@ -61,7 +61,7 @@ ZipStorage::ZipStorage( const Reference< XComponentContext >& rxContext, const R implementation of relations handling. */ mxStorage = ::comphelper::OStorageHelper::GetStorageOfFormatFromInputStream( - ZIP_STORAGE_FORMAT_STRING, rxInStream, rxContext, false); + ZIP_STORAGE_FORMAT_STRING, rxInStream, rxContext, bRepairStorage); } catch (Exception const&) { diff --git a/package/source/zippackage/ZipPackage.cxx b/package/source/zippackage/ZipPackage.cxx index 39a8e17fbdd7..4dc2021a1904 100644 --- a/package/source/zippackage/ZipPackage.cxx +++ b/package/source/zippackage/ZipPackage.cxx @@ -1801,6 +1801,10 @@ Any SAL_CALL ZipPackage::getPropertyValue( const OUString& PropertyName ) { return Any(m_bMediaTypeFallbackUsed); } + else if (PropertyName == "HasElements") + { + return Any(m_pZipFile && m_pZipFile->entries().hasMoreElements()); + } throw UnknownPropertyException(PropertyName); } void SAL_CALL ZipPackage::addPropertyChangeListener( const OUString& /*aPropertyName*/, const uno::Reference< XPropertyChangeListener >& /*xListener*/ )