tdf#158442: fix opening hybrid PDFs on Windows

Commit 046e954595 (Try to revert to use
of file_iterator from boost on Windows, 2023-10-31) had introduced a
problem that pdfparse::PDFReader::read couldn't create file_iterator
for files already opened with write access: mmap_file_iterator ctor
on Windows used single FILE_SHARE_READ as dwSharedMode parameter for
CreateFileA WinAPI; and that failed, when the file was already opened
using GENERIC_WRITE in dwDesiredAccess - which happens when opening
stream in TypeDetection::impl_detectTypeFlatAndDeep.

Fix this by patching boosts' mmap_file_iterator constructor to use
FILE_SHARE_READ | FILE_SHARE_WRITE, like we do in osl_openFile.

But there was a pre-existing problem of using char-based CreateFileA
API, which disallows opening any files with names not representable
in current Windows codepage. Such hybrid PDF files would still fail
creation of the file_iterator, and open as PDF.

Fix that by further patching boost to have wstring-based constructors
for file_iterator and mmap_file_iterator on Windows, which would call
CreateFileW.

Change-Id: Ib190bc090636159ade390b3dd120957d06d7b89b
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/160218
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
This commit is contained in:
Mike Kaganski 2023-12-01 16:48:44 +03:00
parent 8a0015c35f
commit a5a49657dc
13 changed files with 216 additions and 17 deletions

View file

@ -35,6 +35,8 @@ boost_patches += boost-ios.patch.0
# violations":
boost_patches += 0001-Avoid-boost-phoenix-placeholders-uarg1.10-ODR-violat.patch.2
boost_patches += boost.file_iterator.sharing_win.patch
$(eval $(call gb_UnpackedTarball_UnpackedTarball,boost))
$(eval $(call gb_UnpackedTarball_set_tarball,boost,$(BOOST_TARBALL)))

View file

@ -0,0 +1,158 @@
--- foo/misc/boost/boost/spirit/home/classic/iterator/impl/file_iterator.ipp.orig
+++ foo/misc/boost/boost/spirit/home/classic/iterator/impl/file_iterator.ipp
@@ -181,67 +181,28 @@ public:
{}
explicit mmap_file_iterator(std::string const& fileName)
- : m_filesize(0), m_curChar(0)
- {
- HANDLE hFile = ::CreateFileA(
+ : mmap_file_iterator(::CreateFileA(
fileName.c_str(),
GENERIC_READ,
- FILE_SHARE_READ,
+ FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL,
OPEN_EXISTING,
FILE_FLAG_SEQUENTIAL_SCAN,
NULL
- );
-
- if (hFile == INVALID_HANDLE_VALUE)
- return;
-
- // Store the size of the file, it's used to construct
- // the end iterator
- m_filesize = ::GetFileSize(hFile, NULL);
+ ))
+ {}
- HANDLE hMap = ::CreateFileMapping(
- hFile,
+ explicit mmap_file_iterator(std::wstring const& fileName)
+ : mmap_file_iterator(::CreateFileW(
+ fileName.c_str(),
+ GENERIC_READ,
+ FILE_SHARE_READ | FILE_SHARE_WRITE,
NULL,
- PAGE_READONLY,
- 0, 0,
+ OPEN_EXISTING,
+ FILE_FLAG_SEQUENTIAL_SCAN,
NULL
- );
-
- if (hMap == NULL)
- {
- ::CloseHandle(hFile);
- return;
- }
-
- LPVOID pMem = ::MapViewOfFile(
- hMap,
- FILE_MAP_READ,
- 0, 0, 0
- );
-
- if (pMem == NULL)
- {
- ::CloseHandle(hMap);
- ::CloseHandle(hFile);
- return;
- }
-
- // We hold both the file handle and the memory pointer.
- // We can close the hMap handle now because Windows holds internally
- // a reference to it since there is a view mapped.
- ::CloseHandle(hMap);
-
- // It seems like we can close the file handle as well (because
- // a reference is hold by the filemap object).
- ::CloseHandle(hFile);
-
- // Store the handles inside the shared_ptr (with the custom destructors)
- m_mem.reset(static_cast<CharT*>(pMem), ::UnmapViewOfFile);
-
- // Start of the file
- m_curChar = m_mem.get();
- }
+ ))
+ {}
mmap_file_iterator(const mmap_file_iterator& iter)
{ *this = iter; }
@@ -290,6 +251,59 @@ private:
boost::shared_ptr<CharT> m_mem;
std::size_t m_filesize;
CharT* m_curChar;
+
+ explicit mmap_file_iterator(HANDLE hFile)
+ : m_filesize(0), m_curChar(0)
+ {
+ if (hFile == INVALID_HANDLE_VALUE)
+ return;
+
+ // Store the size of the file, it's used to construct
+ // the end iterator
+ m_filesize = ::GetFileSize(hFile, NULL);
+
+ HANDLE hMap = ::CreateFileMapping(
+ hFile,
+ NULL,
+ PAGE_READONLY,
+ 0, 0,
+ NULL
+ );
+
+ if (hMap == NULL)
+ {
+ ::CloseHandle(hFile);
+ return;
+ }
+
+ LPVOID pMem = ::MapViewOfFile(
+ hMap,
+ FILE_MAP_READ,
+ 0, 0, 0
+ );
+
+ if (pMem == NULL)
+ {
+ ::CloseHandle(hMap);
+ ::CloseHandle(hFile);
+ return;
+ }
+
+ // We hold both the file handle and the memory pointer.
+ // We can close the hMap handle now because Windows holds internally
+ // a reference to it since there is a view mapped.
+ ::CloseHandle(hMap);
+
+ // It seems like we can close the file handle as well (because
+ // a reference is hold by the filemap object).
+ ::CloseHandle(hFile);
+
+ // Store the handles inside the shared_ptr (with the custom destructors)
+ m_mem.reset(static_cast<CharT*>(pMem), ::UnmapViewOfFile);
+
+ // Start of the file
+ m_curChar = m_mem.get();
+ }
};
#endif // BOOST_SPIRIT_FILEITERATOR_WINDOWS
--- foo/misc/boost/boost/spirit/home/classic/iterator/file_iterator.hpp.orig
+++ foo/misc/boost/boost/spirit/home/classic/iterator/file_iterator.hpp
@@ -170,6 +170,12 @@ public:
: base_t(adapted_t(fileName))
{}
+#ifdef BOOST_SPIRIT_FILEITERATOR_WINDOWS
+ file_iterator(std::wstring const& fileName)
+ : base_t(adapted_t(fileName))
+ {}
+#endif
+
file_iterator(const base_t& iter)
: base_t(iter)
{}

View file

@ -42,4 +42,8 @@ $(eval $(call gb_CppunitTest_use_rdb,filter_textfilterdetect,services))
$(eval $(call gb_CppunitTest_use_configuration,filter_textfilterdetect))
$(eval $(call gb_CppunitTest_use_custom_headers,filter_textfilterdetect, \
officecfg/registry \
))
# vim: set noet sw=4 ts=4:

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -16,7 +16,9 @@
#include <com/sun/star/sheet/XSpreadsheetDocument.hpp>
#include <com/sun/star/text/XTextDocument.hpp>
#include <comphelper/configuration.hxx>
#include <comphelper/propertyvalue.hxx>
#include <officecfg/Office/Common.hxx>
#include <sfx2/docfac.hxx>
#include <unotools/mediadescriptor.hxx>
#include <unotools/streamwrap.hxx>
@ -31,6 +33,11 @@ using namespace com::sun::star;
namespace
{
bool supportsService(const uno::Reference<lang::XComponent>& x, const OUString& s)
{
return uno::Reference<lang::XServiceInfo>(x, uno::UNO_QUERY_THROW)->supportsService(s);
}
/// Test class for PlainTextFilterDetect.
class TextFilterDetectTest : public UnoApiTest
{
@ -63,10 +70,6 @@ CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testTdf114428)
CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testEmptyFile)
{
auto supportsService = [](const uno::Reference<lang::XComponent>& x, const OUString& s) {
return uno::Reference<lang::XServiceInfo>(x, uno::UNO_QUERY_THROW)->supportsService(s);
};
// Given an empty file, with a pptx extension
// When loading the file
loadFromURL(u"empty.pptx");
@ -172,6 +175,36 @@ CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testEmptyFile)
CPPUNIT_ASSERT_EQUAL(u"Writer templates first line"_ustr, xParagraph->getString());
}
}
CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testHybridPDFFile)
{
// Make sure that file locking is ON
{
std::shared_ptr<comphelper::ConfigurationChanges> xChanges(
comphelper::ConfigurationChanges::create());
officecfg::Office::Common::Misc::UseDocumentSystemFileLocking::set(true, xChanges);
xChanges->commit();
}
// Given a hybrid PDF file
// Created in Writer
loadFromURL(u"hybrid_writer_абв_αβγ.pdf");
// Make sure it opens in Writer.
// Without the accompanying fix in place, this test would have failed on Windows, as it was
// opened in Draw instead.
CPPUNIT_ASSERT(supportsService(mxComponent, "com.sun.star.text.TextDocument"));
// Created in Calc
loadFromURL(u"hybrid_calc_абв_αβγ.pdf");
// Make sure it opens in Calc.
CPPUNIT_ASSERT(supportsService(mxComponent, "com.sun.star.sheet.SpreadsheetDocument"));
// Created in Impress
loadFromURL(u"hybrid_impress_абв_αβγ.pdf");
// Make sure it opens in Impress.
CPPUNIT_ASSERT(supportsService(mxComponent, "com.sun.star.presentation.PresentationDocument"));
}
}
CPPUNIT_PLUGIN_IMPLEMENT();

View file

@ -41,6 +41,7 @@ inline char16_t* toU(wchar_t* p) { return reinterpret_cast<char16_t*>(p); }
inline char16_t const* toU(wchar_t const* p) { return reinterpret_cast<char16_t const*>(p); }
inline std::u16string_view toU(std::wstring_view v) { return { toU(v.data()), v.size() }; }
inline std::wstring_view toW(std::u16string_view v) { return { toW(v.data()), v.size() }; }
#endif
}

View file

@ -555,7 +555,6 @@ uno::Reference< io::XStream > getAdditionalStream( const OUString&
bool bMayUseUI )
{
uno::Reference< io::XStream > xEmbed;
OString aPDFFile;
OUString aSysUPath;
if( osl_getSystemPathFromFileURL( rInPDFFileURL.pData, &aSysUPath.pData ) != osl_File_E_None )
return xEmbed;
@ -563,9 +562,7 @@ uno::Reference< io::XStream > getAdditionalStream( const OUString&
if (!detectHasAdditionalStreams(aSysUPath))
return xEmbed;
aPDFFile = OUStringToOString( aSysUPath, osl_getThreadTextEncoding() );
std::unique_ptr<pdfparse::PDFEntry> pEntry( pdfparse::PDFReader::read( aPDFFile.getStr() ));
std::unique_ptr<pdfparse::PDFEntry> pEntry(pdfparse::PDFReader::read(aSysUPath));
if( pEntry )
{
pdfparse::PDFFile* pPDFFile = dynamic_cast<pdfparse::PDFFile*>(pEntry.get());

View file

@ -292,10 +292,7 @@ struct PDFReader
{
PDFReader() = delete;
static std::unique_ptr<PDFEntry> read( const char* pFileName );
#ifdef _WIN32
static std::unique_ptr<PDFEntry> read( const char* pBuffer, unsigned int nLen );
#endif
static std::unique_ptr<PDFEntry> read(std::u16string_view aFileName);
};
} // namespace

View file

@ -36,7 +36,9 @@
#include <string.h>
#include <o3tl/char16_t2wchar_t.hxx>
#include <o3tl/safeint.hxx>
#include <osl/thread.h>
#include <rtl/strbuf.hxx>
#include <rtl/ustrbuf.hxx>
#include <sal/log.hxx>
@ -558,9 +560,14 @@ public:
}
std::unique_ptr<PDFEntry> PDFReader::read( const char* pFileName )
std::unique_ptr<PDFEntry> PDFReader::read(std::u16string_view aFileName)
{
file_iterator<> file_start( pFileName );
#ifdef _WIN32
file_iterator<> file_start(std::wstring(o3tl::toW(aFileName)));
#else
file_iterator<> file_start(
std::string(OUStringToOString(aFileName, osl_getThreadTextEncoding())));
#endif
if( ! file_start )
return nullptr;
file_iterator<> file_end = file_start.make_end();

View file

@ -224,7 +224,8 @@ typedef int(*PDFFileHdl)(const char*, const char*, PDFFile*);
static int handleFile( const char* pInFile, const char* pOutFile, const char* pPassword, PDFFileHdl pHdl )
{
int nRet = 0;
std::unique_ptr<PDFEntry> pEntry = pdfparse::PDFReader::read( pInFile );
std::unique_ptr<PDFEntry> pEntry
= pdfparse::PDFReader::read(OStringToOUString(pInFile, osl_getThreadTextEncoding()));
if( pEntry )
{
PDFFile* pPDFFile = dynamic_cast<PDFFile*>(pEntry.get());

View file

@ -911,9 +911,8 @@ static bool checkEncryption( std::u16string_view i_rPa
)
{
bool bSuccess = false;
OString aPDFFile = OUStringToOString( i_rPath, osl_getThreadTextEncoding() );
std::unique_ptr<pdfparse::PDFEntry> pEntry( pdfparse::PDFReader::read( aPDFFile.getStr() ));
std::unique_ptr<pdfparse::PDFEntry> pEntry(pdfparse::PDFReader::read(i_rPath));
if( pEntry )
{
pdfparse::PDFFile* pPDFFile = dynamic_cast<pdfparse::PDFFile*>(pEntry.get());