tdf#158442: fix opening hybrid PDFs on Windows
Commit 046e954595
(Try to revert to use
of file_iterator from boost on Windows, 2023-10-31) had introduced a
problem that pdfparse::PDFReader::read couldn't create file_iterator
for files already opened with write access: mmap_file_iterator ctor
on Windows used single FILE_SHARE_READ as dwSharedMode parameter for
CreateFileA WinAPI; and that failed, when the file was already opened
using GENERIC_WRITE in dwDesiredAccess - which happens when opening
stream in TypeDetection::impl_detectTypeFlatAndDeep.
Fix this by patching boosts' mmap_file_iterator constructor to use
FILE_SHARE_READ | FILE_SHARE_WRITE, like we do in osl_openFile.
But there was a pre-existing problem of using char-based CreateFileA
API, which disallows opening any files with names not representable
in current Windows codepage. Such hybrid PDF files would still fail
creation of the file_iterator, and open as PDF.
Fix that by further patching boost to have wstring-based constructors
for file_iterator and mmap_file_iterator on Windows, which would call
CreateFileW.
Change-Id: Ib190bc090636159ade390b3dd120957d06d7b89b
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/160218
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
This commit is contained in:
parent
8a0015c35f
commit
a5a49657dc
13 changed files with 216 additions and 17 deletions
2
external/boost/UnpackedTarball_boost.mk
vendored
2
external/boost/UnpackedTarball_boost.mk
vendored
|
@ -35,6 +35,8 @@ boost_patches += boost-ios.patch.0
|
|||
# violations":
|
||||
boost_patches += 0001-Avoid-boost-phoenix-placeholders-uarg1.10-ODR-violat.patch.2
|
||||
|
||||
boost_patches += boost.file_iterator.sharing_win.patch
|
||||
|
||||
$(eval $(call gb_UnpackedTarball_UnpackedTarball,boost))
|
||||
|
||||
$(eval $(call gb_UnpackedTarball_set_tarball,boost,$(BOOST_TARBALL)))
|
||||
|
|
158
external/boost/boost.file_iterator.sharing_win.patch
vendored
Normal file
158
external/boost/boost.file_iterator.sharing_win.patch
vendored
Normal file
|
@ -0,0 +1,158 @@
|
|||
--- foo/misc/boost/boost/spirit/home/classic/iterator/impl/file_iterator.ipp.orig
|
||||
+++ foo/misc/boost/boost/spirit/home/classic/iterator/impl/file_iterator.ipp
|
||||
@@ -181,67 +181,28 @@ public:
|
||||
{}
|
||||
|
||||
explicit mmap_file_iterator(std::string const& fileName)
|
||||
- : m_filesize(0), m_curChar(0)
|
||||
- {
|
||||
- HANDLE hFile = ::CreateFileA(
|
||||
+ : mmap_file_iterator(::CreateFileA(
|
||||
fileName.c_str(),
|
||||
GENERIC_READ,
|
||||
- FILE_SHARE_READ,
|
||||
+ FILE_SHARE_READ | FILE_SHARE_WRITE,
|
||||
NULL,
|
||||
OPEN_EXISTING,
|
||||
FILE_FLAG_SEQUENTIAL_SCAN,
|
||||
NULL
|
||||
- );
|
||||
-
|
||||
- if (hFile == INVALID_HANDLE_VALUE)
|
||||
- return;
|
||||
-
|
||||
- // Store the size of the file, it's used to construct
|
||||
- // the end iterator
|
||||
- m_filesize = ::GetFileSize(hFile, NULL);
|
||||
+ ))
|
||||
+ {}
|
||||
|
||||
- HANDLE hMap = ::CreateFileMapping(
|
||||
- hFile,
|
||||
+ explicit mmap_file_iterator(std::wstring const& fileName)
|
||||
+ : mmap_file_iterator(::CreateFileW(
|
||||
+ fileName.c_str(),
|
||||
+ GENERIC_READ,
|
||||
+ FILE_SHARE_READ | FILE_SHARE_WRITE,
|
||||
NULL,
|
||||
- PAGE_READONLY,
|
||||
- 0, 0,
|
||||
+ OPEN_EXISTING,
|
||||
+ FILE_FLAG_SEQUENTIAL_SCAN,
|
||||
NULL
|
||||
- );
|
||||
-
|
||||
- if (hMap == NULL)
|
||||
- {
|
||||
- ::CloseHandle(hFile);
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- LPVOID pMem = ::MapViewOfFile(
|
||||
- hMap,
|
||||
- FILE_MAP_READ,
|
||||
- 0, 0, 0
|
||||
- );
|
||||
-
|
||||
- if (pMem == NULL)
|
||||
- {
|
||||
- ::CloseHandle(hMap);
|
||||
- ::CloseHandle(hFile);
|
||||
- return;
|
||||
- }
|
||||
-
|
||||
- // We hold both the file handle and the memory pointer.
|
||||
- // We can close the hMap handle now because Windows holds internally
|
||||
- // a reference to it since there is a view mapped.
|
||||
- ::CloseHandle(hMap);
|
||||
-
|
||||
- // It seems like we can close the file handle as well (because
|
||||
- // a reference is hold by the filemap object).
|
||||
- ::CloseHandle(hFile);
|
||||
-
|
||||
- // Store the handles inside the shared_ptr (with the custom destructors)
|
||||
- m_mem.reset(static_cast<CharT*>(pMem), ::UnmapViewOfFile);
|
||||
-
|
||||
- // Start of the file
|
||||
- m_curChar = m_mem.get();
|
||||
- }
|
||||
+ ))
|
||||
+ {}
|
||||
|
||||
mmap_file_iterator(const mmap_file_iterator& iter)
|
||||
{ *this = iter; }
|
||||
@@ -290,6 +251,59 @@ private:
|
||||
boost::shared_ptr<CharT> m_mem;
|
||||
std::size_t m_filesize;
|
||||
CharT* m_curChar;
|
||||
+
|
||||
+ explicit mmap_file_iterator(HANDLE hFile)
|
||||
+ : m_filesize(0), m_curChar(0)
|
||||
+ {
|
||||
+ if (hFile == INVALID_HANDLE_VALUE)
|
||||
+ return;
|
||||
+
|
||||
+ // Store the size of the file, it's used to construct
|
||||
+ // the end iterator
|
||||
+ m_filesize = ::GetFileSize(hFile, NULL);
|
||||
+
|
||||
+ HANDLE hMap = ::CreateFileMapping(
|
||||
+ hFile,
|
||||
+ NULL,
|
||||
+ PAGE_READONLY,
|
||||
+ 0, 0,
|
||||
+ NULL
|
||||
+ );
|
||||
+
|
||||
+ if (hMap == NULL)
|
||||
+ {
|
||||
+ ::CloseHandle(hFile);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ LPVOID pMem = ::MapViewOfFile(
|
||||
+ hMap,
|
||||
+ FILE_MAP_READ,
|
||||
+ 0, 0, 0
|
||||
+ );
|
||||
+
|
||||
+ if (pMem == NULL)
|
||||
+ {
|
||||
+ ::CloseHandle(hMap);
|
||||
+ ::CloseHandle(hFile);
|
||||
+ return;
|
||||
+ }
|
||||
+
|
||||
+ // We hold both the file handle and the memory pointer.
|
||||
+ // We can close the hMap handle now because Windows holds internally
|
||||
+ // a reference to it since there is a view mapped.
|
||||
+ ::CloseHandle(hMap);
|
||||
+
|
||||
+ // It seems like we can close the file handle as well (because
|
||||
+ // a reference is hold by the filemap object).
|
||||
+ ::CloseHandle(hFile);
|
||||
+
|
||||
+ // Store the handles inside the shared_ptr (with the custom destructors)
|
||||
+ m_mem.reset(static_cast<CharT*>(pMem), ::UnmapViewOfFile);
|
||||
+
|
||||
+ // Start of the file
|
||||
+ m_curChar = m_mem.get();
|
||||
+ }
|
||||
};
|
||||
|
||||
#endif // BOOST_SPIRIT_FILEITERATOR_WINDOWS
|
||||
--- foo/misc/boost/boost/spirit/home/classic/iterator/file_iterator.hpp.orig
|
||||
+++ foo/misc/boost/boost/spirit/home/classic/iterator/file_iterator.hpp
|
||||
@@ -170,6 +170,12 @@ public:
|
||||
: base_t(adapted_t(fileName))
|
||||
{}
|
||||
|
||||
+#ifdef BOOST_SPIRIT_FILEITERATOR_WINDOWS
|
||||
+ file_iterator(std::wstring const& fileName)
|
||||
+ : base_t(adapted_t(fileName))
|
||||
+ {}
|
||||
+#endif
|
||||
+
|
||||
file_iterator(const base_t& iter)
|
||||
: base_t(iter)
|
||||
{}
|
|
@ -42,4 +42,8 @@ $(eval $(call gb_CppunitTest_use_rdb,filter_textfilterdetect,services))
|
|||
|
||||
$(eval $(call gb_CppunitTest_use_configuration,filter_textfilterdetect))
|
||||
|
||||
$(eval $(call gb_CppunitTest_use_custom_headers,filter_textfilterdetect, \
|
||||
officecfg/registry \
|
||||
))
|
||||
|
||||
# vim: set noet sw=4 ts=4:
|
||||
|
|
BIN
filter/qa/unit/data/hybrid_calc_абв_αβγ.pdf
Normal file
BIN
filter/qa/unit/data/hybrid_calc_абв_αβγ.pdf
Normal file
Binary file not shown.
BIN
filter/qa/unit/data/hybrid_impress_абв_αβγ.pdf
Normal file
BIN
filter/qa/unit/data/hybrid_impress_абв_αβγ.pdf
Normal file
Binary file not shown.
BIN
filter/qa/unit/data/hybrid_writer_абв_αβγ.pdf
Normal file
BIN
filter/qa/unit/data/hybrid_writer_абв_αβγ.pdf
Normal file
Binary file not shown.
|
@ -16,7 +16,9 @@
|
|||
#include <com/sun/star/sheet/XSpreadsheetDocument.hpp>
|
||||
#include <com/sun/star/text/XTextDocument.hpp>
|
||||
|
||||
#include <comphelper/configuration.hxx>
|
||||
#include <comphelper/propertyvalue.hxx>
|
||||
#include <officecfg/Office/Common.hxx>
|
||||
#include <sfx2/docfac.hxx>
|
||||
#include <unotools/mediadescriptor.hxx>
|
||||
#include <unotools/streamwrap.hxx>
|
||||
|
@ -31,6 +33,11 @@ using namespace com::sun::star;
|
|||
|
||||
namespace
|
||||
{
|
||||
bool supportsService(const uno::Reference<lang::XComponent>& x, const OUString& s)
|
||||
{
|
||||
return uno::Reference<lang::XServiceInfo>(x, uno::UNO_QUERY_THROW)->supportsService(s);
|
||||
}
|
||||
|
||||
/// Test class for PlainTextFilterDetect.
|
||||
class TextFilterDetectTest : public UnoApiTest
|
||||
{
|
||||
|
@ -63,10 +70,6 @@ CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testTdf114428)
|
|||
|
||||
CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testEmptyFile)
|
||||
{
|
||||
auto supportsService = [](const uno::Reference<lang::XComponent>& x, const OUString& s) {
|
||||
return uno::Reference<lang::XServiceInfo>(x, uno::UNO_QUERY_THROW)->supportsService(s);
|
||||
};
|
||||
|
||||
// Given an empty file, with a pptx extension
|
||||
// When loading the file
|
||||
loadFromURL(u"empty.pptx");
|
||||
|
@ -172,6 +175,36 @@ CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testEmptyFile)
|
|||
CPPUNIT_ASSERT_EQUAL(u"Writer template’s first line"_ustr, xParagraph->getString());
|
||||
}
|
||||
}
|
||||
|
||||
CPPUNIT_TEST_FIXTURE(TextFilterDetectTest, testHybridPDFFile)
|
||||
{
|
||||
// Make sure that file locking is ON
|
||||
{
|
||||
std::shared_ptr<comphelper::ConfigurationChanges> xChanges(
|
||||
comphelper::ConfigurationChanges::create());
|
||||
officecfg::Office::Common::Misc::UseDocumentSystemFileLocking::set(true, xChanges);
|
||||
xChanges->commit();
|
||||
}
|
||||
|
||||
// Given a hybrid PDF file
|
||||
|
||||
// Created in Writer
|
||||
loadFromURL(u"hybrid_writer_абв_αβγ.pdf");
|
||||
// Make sure it opens in Writer.
|
||||
// Without the accompanying fix in place, this test would have failed on Windows, as it was
|
||||
// opened in Draw instead.
|
||||
CPPUNIT_ASSERT(supportsService(mxComponent, "com.sun.star.text.TextDocument"));
|
||||
|
||||
// Created in Calc
|
||||
loadFromURL(u"hybrid_calc_абв_αβγ.pdf");
|
||||
// Make sure it opens in Calc.
|
||||
CPPUNIT_ASSERT(supportsService(mxComponent, "com.sun.star.sheet.SpreadsheetDocument"));
|
||||
|
||||
// Created in Impress
|
||||
loadFromURL(u"hybrid_impress_абв_αβγ.pdf");
|
||||
// Make sure it opens in Impress.
|
||||
CPPUNIT_ASSERT(supportsService(mxComponent, "com.sun.star.presentation.PresentationDocument"));
|
||||
}
|
||||
}
|
||||
|
||||
CPPUNIT_PLUGIN_IMPLEMENT();
|
||||
|
|
|
@ -41,6 +41,7 @@ inline char16_t* toU(wchar_t* p) { return reinterpret_cast<char16_t*>(p); }
|
|||
inline char16_t const* toU(wchar_t const* p) { return reinterpret_cast<char16_t const*>(p); }
|
||||
|
||||
inline std::u16string_view toU(std::wstring_view v) { return { toU(v.data()), v.size() }; }
|
||||
inline std::wstring_view toW(std::u16string_view v) { return { toW(v.data()), v.size() }; }
|
||||
#endif
|
||||
}
|
||||
|
||||
|
|
|
@ -555,7 +555,6 @@ uno::Reference< io::XStream > getAdditionalStream( const OUString&
|
|||
bool bMayUseUI )
|
||||
{
|
||||
uno::Reference< io::XStream > xEmbed;
|
||||
OString aPDFFile;
|
||||
OUString aSysUPath;
|
||||
if( osl_getSystemPathFromFileURL( rInPDFFileURL.pData, &aSysUPath.pData ) != osl_File_E_None )
|
||||
return xEmbed;
|
||||
|
@ -563,9 +562,7 @@ uno::Reference< io::XStream > getAdditionalStream( const OUString&
|
|||
if (!detectHasAdditionalStreams(aSysUPath))
|
||||
return xEmbed;
|
||||
|
||||
aPDFFile = OUStringToOString( aSysUPath, osl_getThreadTextEncoding() );
|
||||
|
||||
std::unique_ptr<pdfparse::PDFEntry> pEntry( pdfparse::PDFReader::read( aPDFFile.getStr() ));
|
||||
std::unique_ptr<pdfparse::PDFEntry> pEntry(pdfparse::PDFReader::read(aSysUPath));
|
||||
if( pEntry )
|
||||
{
|
||||
pdfparse::PDFFile* pPDFFile = dynamic_cast<pdfparse::PDFFile*>(pEntry.get());
|
||||
|
|
|
@ -292,10 +292,7 @@ struct PDFReader
|
|||
{
|
||||
PDFReader() = delete;
|
||||
|
||||
static std::unique_ptr<PDFEntry> read( const char* pFileName );
|
||||
#ifdef _WIN32
|
||||
static std::unique_ptr<PDFEntry> read( const char* pBuffer, unsigned int nLen );
|
||||
#endif
|
||||
static std::unique_ptr<PDFEntry> read(std::u16string_view aFileName);
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
|
|
@ -36,7 +36,9 @@
|
|||
|
||||
#include <string.h>
|
||||
|
||||
#include <o3tl/char16_t2wchar_t.hxx>
|
||||
#include <o3tl/safeint.hxx>
|
||||
#include <osl/thread.h>
|
||||
#include <rtl/strbuf.hxx>
|
||||
#include <rtl/ustrbuf.hxx>
|
||||
#include <sal/log.hxx>
|
||||
|
@ -558,9 +560,14 @@ public:
|
|||
|
||||
}
|
||||
|
||||
std::unique_ptr<PDFEntry> PDFReader::read( const char* pFileName )
|
||||
std::unique_ptr<PDFEntry> PDFReader::read(std::u16string_view aFileName)
|
||||
{
|
||||
file_iterator<> file_start( pFileName );
|
||||
#ifdef _WIN32
|
||||
file_iterator<> file_start(std::wstring(o3tl::toW(aFileName)));
|
||||
#else
|
||||
file_iterator<> file_start(
|
||||
std::string(OUStringToOString(aFileName, osl_getThreadTextEncoding())));
|
||||
#endif
|
||||
if( ! file_start )
|
||||
return nullptr;
|
||||
file_iterator<> file_end = file_start.make_end();
|
||||
|
|
|
@ -224,7 +224,8 @@ typedef int(*PDFFileHdl)(const char*, const char*, PDFFile*);
|
|||
static int handleFile( const char* pInFile, const char* pOutFile, const char* pPassword, PDFFileHdl pHdl )
|
||||
{
|
||||
int nRet = 0;
|
||||
std::unique_ptr<PDFEntry> pEntry = pdfparse::PDFReader::read( pInFile );
|
||||
std::unique_ptr<PDFEntry> pEntry
|
||||
= pdfparse::PDFReader::read(OStringToOUString(pInFile, osl_getThreadTextEncoding()));
|
||||
if( pEntry )
|
||||
{
|
||||
PDFFile* pPDFFile = dynamic_cast<PDFFile*>(pEntry.get());
|
||||
|
|
|
@ -911,9 +911,8 @@ static bool checkEncryption( std::u16string_view i_rPa
|
|||
)
|
||||
{
|
||||
bool bSuccess = false;
|
||||
OString aPDFFile = OUStringToOString( i_rPath, osl_getThreadTextEncoding() );
|
||||
|
||||
std::unique_ptr<pdfparse::PDFEntry> pEntry( pdfparse::PDFReader::read( aPDFFile.getStr() ));
|
||||
std::unique_ptr<pdfparse::PDFEntry> pEntry(pdfparse::PDFReader::read(i_rPath));
|
||||
if( pEntry )
|
||||
{
|
||||
pdfparse::PDFFile* pPDFFile = dynamic_cast<pdfparse::PDFFile*>(pEntry.get());
|
||||
|
|
Loading…
Reference in a new issue