56abb1c760
/usr/bin/../lib64/gcc/x86_64-suse-linux/12/../../../../include/c++/12/bits/stl_vector.h:1124:9: runtime error: reference binding to null pointer of type 'char' #0 0x557ce8c06055 in std::vector<char, std::allocator<char>>::operator[](unsigned long) /usr/bin/../lib64/gcc/x86_64-suse-linux/12/../../../../include/c++/12/bits/stl_vector.h:1124:2 #1 0x557ce8f16e3f in FileUtil::readFile(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char>> const&, int) /home/vmiklos/git/collaboraonline/online-23.05-san/test/../common/FileUtil.cpp:449:37 While running toplevel 'make'. Both 'st.st_size' and 'off' were 0, so this is an edge case. Signed-off-by: Miklos Vajna <vmiklos@collabora.com> Change-Id: I4899bffa48a761c84b6ee7d42776453b641ff5a7
645 lines
19 KiB
C++
645 lines
19 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
|
|
/*
|
|
* Copyright the Collabora Online contributors.
|
|
*
|
|
* SPDX-License-Identifier: MPL-2.0
|
|
*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
*/
|
|
|
|
#include <config.h>
|
|
|
|
#include "FileUtil.hpp"
|
|
|
|
#include <dirent.h>
|
|
#include <exception>
|
|
#include <ftw.h>
|
|
#include <stdexcept>
|
|
#include <sys/time.h>
|
|
#ifdef __linux__
|
|
#include <sys/vfs.h>
|
|
#elif defined IOS
|
|
#import <Foundation/Foundation.h>
|
|
#elif defined __FreeBSD__
|
|
#include <sys/param.h>
|
|
#include <sys/mount.h>
|
|
#endif
|
|
|
|
#include <fcntl.h>
|
|
#include <chrono>
|
|
#include <cstdio>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <fstream>
|
|
#include <mutex>
|
|
#include <string>
|
|
|
|
#if HAVE_STD_FILESYSTEM
|
|
# if HAVE_STD_FILESYSTEM_EXPERIMENTAL
|
|
# include <experimental/filesystem>
|
|
namespace filesystem = ::std::experimental::filesystem;
|
|
# else
|
|
# include <filesystem>
|
|
namespace filesystem = ::std::filesystem;
|
|
# endif
|
|
#else
|
|
# include <Poco/TemporaryFile.h>
|
|
#endif
|
|
|
|
#include <Poco/File.h>
|
|
#include <Poco/Path.h>
|
|
|
|
#include "Log.hpp"
|
|
#include "Util.hpp"
|
|
#include "Unit.hpp"
|
|
|
|
namespace
|
|
{
|
|
#if HAVE_STD_FILESYSTEM
|
|
/// Class to delete files when the process ends.
|
|
class FileDeleter
|
|
{
|
|
std::vector<std::string> _filesToDelete;
|
|
std::mutex _lock;
|
|
public:
|
|
FileDeleter() {}
|
|
~FileDeleter()
|
|
{
|
|
std::unique_lock<std::mutex> guard(_lock);
|
|
for (const std::string& file: _filesToDelete)
|
|
filesystem::remove(file);
|
|
}
|
|
|
|
void registerForDeletion(const std::string& file)
|
|
{
|
|
std::unique_lock<std::mutex> guard(_lock);
|
|
_filesToDelete.push_back(file);
|
|
}
|
|
};
|
|
#endif
|
|
}
|
|
|
|
namespace FileUtil
|
|
{
|
|
std::string createRandomDir(const std::string& path)
|
|
{
|
|
std::string name = Util::rng::getFilename(64);
|
|
#if HAVE_STD_FILESYSTEM
|
|
filesystem::create_directory(path + '/' + name);
|
|
#else
|
|
Poco::File(Poco::Path(path, name)).createDirectories();
|
|
#endif
|
|
return name;
|
|
}
|
|
|
|
bool copy(const std::string& fromPath, const std::string& toPath, bool log, bool throw_on_error)
|
|
{
|
|
int from = -1, to = -1;
|
|
try
|
|
{
|
|
from = open(fromPath.c_str(), O_RDONLY);
|
|
if (from < 0)
|
|
throw std::runtime_error("Failed to open src " + anonymizeUrl(fromPath));
|
|
|
|
struct stat st;
|
|
if (fstat(from, &st) != 0)
|
|
throw std::runtime_error("Failed to fstat src " + anonymizeUrl(fromPath));
|
|
|
|
to = open(toPath.c_str(), O_CREAT | O_TRUNC | O_WRONLY, st.st_mode);
|
|
if (to < 0)
|
|
throw std::runtime_error("Failed to open dest " + anonymizeUrl(toPath));
|
|
|
|
// Logging may be redundant and/or noisy.
|
|
if (log)
|
|
LOG_INF("Copying " << st.st_size << " bytes from " << anonymizeUrl(fromPath)
|
|
<< " to " << anonymizeUrl(toPath));
|
|
|
|
char buffer[64 * 1024];
|
|
|
|
int n;
|
|
off_t bytesIn = 0;
|
|
do
|
|
{
|
|
while ((n = ::read(from, buffer, sizeof(buffer))) < 0 && errno == EINTR)
|
|
LOG_TRC("EINTR reading from " << anonymizeUrl(fromPath));
|
|
if (n < 0)
|
|
throw std::runtime_error("Failed to read from " + anonymizeUrl(fromPath)
|
|
+ " at " + std::to_string(bytesIn) + " bytes in");
|
|
|
|
bytesIn += n;
|
|
if (n == 0) // EOF
|
|
break;
|
|
assert (off_t(sizeof (buffer)) >= n);
|
|
// Handle short writes and EINTR
|
|
for (int j = 0; j < n;)
|
|
{
|
|
int written;
|
|
while ((written = ::write(to, buffer + j, n - j)) < 0 && errno == EINTR)
|
|
LOG_TRC("EINTR writing to " << anonymizeUrl(toPath));
|
|
if (written < 0)
|
|
{
|
|
throw std::runtime_error("Failed to write " + std::to_string(n)
|
|
+ " bytes to " + anonymizeUrl(toPath) + " at "
|
|
+ std::to_string(bytesIn) + " bytes into "
|
|
+ anonymizeUrl(fromPath));
|
|
}
|
|
j += written;
|
|
}
|
|
} while (true);
|
|
if (bytesIn != st.st_size)
|
|
{
|
|
LOG_WRN("Unusual: file " << anonymizeUrl(fromPath) << " changed size "
|
|
"during copy from " << st.st_size << " to " << bytesIn);
|
|
}
|
|
close(from);
|
|
close(to);
|
|
return true;
|
|
}
|
|
catch (const std::exception& ex)
|
|
{
|
|
std::ostringstream oss;
|
|
oss << "Error while copying from " << anonymizeUrl(fromPath) << " to "
|
|
<< anonymizeUrl(toPath) << ": " << ex.what();
|
|
const std::string err = oss.str();
|
|
LOG_ERR(err);
|
|
close(from);
|
|
close(to);
|
|
unlink(toPath.c_str());
|
|
if (throw_on_error)
|
|
throw std::runtime_error(err);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
std::string getSysTempDirectoryPath()
|
|
{
|
|
// Don't const to allow for automatic move on return.
|
|
#if HAVE_STD_FILESYSTEM
|
|
std::string path = filesystem::temp_directory_path();
|
|
#else
|
|
std::string path = Poco::Path::temp();
|
|
#endif
|
|
|
|
if (!path.empty())
|
|
return path;
|
|
|
|
// Sensible fallback, though shouldn't be needed.
|
|
const char *tmp = getenv("TMPDIR");
|
|
if (!tmp)
|
|
tmp = getenv("TEMP");
|
|
if (!tmp)
|
|
tmp = getenv("TMP");
|
|
if (!tmp)
|
|
tmp = "/tmp";
|
|
return tmp;
|
|
}
|
|
|
|
std::string createRandomTmpDir(std::string root)
|
|
{
|
|
if (root.empty())
|
|
root = getSysTempDirectoryPath();
|
|
|
|
Poco::File(root).createDirectories();
|
|
|
|
// Don't const to allow for automatic move on return.
|
|
std::string newTmp = root + "/cool-" + Util::rng::getFilename(16);
|
|
if (::mkdir(newTmp.c_str(), S_IRWXU) < 0)
|
|
{
|
|
LOG_SYS("Failed to create random temp directory [" << newTmp << ']');
|
|
return root;
|
|
}
|
|
return newTmp;
|
|
}
|
|
|
|
std::string createTmpDir(std::string dirName, std::string root)
|
|
{
|
|
if (root.empty())
|
|
root = getSysTempDirectoryPath();
|
|
|
|
Poco::File(root).createDirectories();
|
|
|
|
// Don't const to allow for automatic move on return.
|
|
std::string newTmp = root + '/' + dirName;
|
|
if (::mkdir(newTmp.c_str(), S_IRWXU) < 0)
|
|
{
|
|
LOG_SYS("Failed to create temp directory [" << newTmp << ']');
|
|
return root;
|
|
}
|
|
return newTmp;
|
|
}
|
|
|
|
std::string getTempFileCopyPath(const std::string& srcDir, const std::string& srcFilename, const std::string& dstFilenamePrefix)
|
|
{
|
|
const std::string srcPath = srcDir + '/' + srcFilename;
|
|
const std::string dstFilename = dstFilenamePrefix + Util::encodeId(Util::rng::getNext()) + '_' + srcFilename;
|
|
#if HAVE_STD_FILESYSTEM
|
|
// Don't const to allow for automatic move on return.
|
|
std::string dstPath = filesystem::temp_directory_path() / dstFilename;
|
|
filesystem::copy(srcPath, dstPath);
|
|
|
|
static FileDeleter fileDeleter;
|
|
fileDeleter.registerForDeletion(dstPath);
|
|
#else
|
|
const std::string dstPath = Poco::Path(Poco::Path::temp(), dstFilename).toString();
|
|
copyFileTo(srcPath, dstPath);
|
|
Poco::TemporaryFile::registerForDeletion(dstPath);
|
|
#endif
|
|
|
|
return dstPath;
|
|
}
|
|
|
|
#if 1 // !HAVE_STD_FILESYSTEM
|
|
static int nftw_cb(const char *fpath, const struct stat*, int type, struct FTW*)
|
|
{
|
|
if (type == FTW_DP)
|
|
{
|
|
rmdir(fpath);
|
|
}
|
|
else if (type == FTW_F || type == FTW_SL)
|
|
{
|
|
unlink(fpath);
|
|
}
|
|
|
|
// Always continue even when things go wrong.
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
void removeFile(const std::string& path, const bool recursive)
|
|
{
|
|
LOG_DBG("Removing [" << path << "] " << (recursive ? "recursively." : "only."));
|
|
|
|
// Amazingly filesystem::remove_all silently fails to work on some
|
|
// systems. No real need to be using experimental API here either.
|
|
#if 0 // HAVE_STD_FILESYSTEM
|
|
std::error_code ec;
|
|
if (recursive)
|
|
filesystem::remove_all(path, ec);
|
|
else
|
|
filesystem::remove(path, ec);
|
|
|
|
// Already removed or we don't care about failures.
|
|
(void) ec;
|
|
#else
|
|
try
|
|
{
|
|
struct stat sb;
|
|
errno = 0;
|
|
if (!recursive || stat(path.c_str(), &sb) == -1 || S_ISREG(sb.st_mode))
|
|
{
|
|
// Non-recursive directories and files that exist.
|
|
if (errno != ENOENT)
|
|
Poco::File(path).remove(recursive);
|
|
}
|
|
else
|
|
{
|
|
// Directories only.
|
|
nftw(path.c_str(), nftw_cb, 128, FTW_DEPTH | FTW_PHYS);
|
|
}
|
|
}
|
|
catch (const std::exception& e)
|
|
{
|
|
// Don't complain if already non-existant.
|
|
if (FileUtil::Stat(path).exists())
|
|
{
|
|
// Error only if it still exists.
|
|
LOG_ERR("Failed to remove ["
|
|
<< path << "] " << (recursive ? "recursively: " : "only: ") << e.what());
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
std::string realpath(const char* path)
|
|
{
|
|
char* resolved = ::realpath(path, nullptr);
|
|
if (resolved)
|
|
{
|
|
std::string real = resolved;
|
|
free(resolved);
|
|
return real;
|
|
}
|
|
|
|
LOG_SYS("Failed to get the realpath of [" << path << ']');
|
|
return path;
|
|
}
|
|
|
|
bool isEmptyDirectory(const char* path)
|
|
{
|
|
DIR* dir = opendir(path);
|
|
if (dir == nullptr)
|
|
return errno != EACCES; // Assume it's not empty when EACCES.
|
|
|
|
int count = 0;
|
|
while (readdir(dir) && ++count < 3)
|
|
;
|
|
|
|
closedir(dir);
|
|
return count <= 2; // Discounting . and ..
|
|
}
|
|
|
|
bool isWritable(const char* path)
|
|
{
|
|
if (access(path, W_OK) == 0)
|
|
return true;
|
|
|
|
LOG_INF("No write access to path [" << path << "]: " << strerror(errno));
|
|
return false;
|
|
}
|
|
|
|
bool updateTimestamps(const std::string& filename, timespec tsAccess, timespec tsModified)
|
|
{
|
|
// The timestamp is in seconds and microseconds.
|
|
timeval timestamps[2]
|
|
{
|
|
{
|
|
tsAccess.tv_sec,
|
|
#ifdef IOS
|
|
(__darwin_suseconds_t)
|
|
#endif
|
|
(tsAccess.tv_nsec / 1000)
|
|
},
|
|
{
|
|
tsModified.tv_sec,
|
|
#ifdef IOS
|
|
(__darwin_suseconds_t)
|
|
#endif
|
|
(tsModified.tv_nsec / 1000)
|
|
}
|
|
};
|
|
if (utimes(filename.c_str(), timestamps) != 0)
|
|
{
|
|
LOG_SYS("Failed to update the timestamp of [" << filename << ']');
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
bool copyAtomic(const std::string& fromPath, const std::string& toPath, bool preserveTimestamps)
|
|
{
|
|
const std::string randFilename = toPath + Util::rng::getFilename(12);
|
|
if (copy(fromPath, randFilename, /*log=*/false, /*throw_on_error=*/false))
|
|
{
|
|
if (preserveTimestamps)
|
|
{
|
|
const Stat st(fromPath);
|
|
updateTimestamps(randFilename,
|
|
#ifdef IOS
|
|
st.sb().st_atimespec, st.sb().st_mtimespec
|
|
#else
|
|
st.sb().st_atim, st.sb().st_mtim
|
|
#endif
|
|
);
|
|
}
|
|
|
|
// Now rename atomically, replacing any existing files with the same name.
|
|
if (rename(randFilename.c_str(), toPath.c_str()) == 0)
|
|
return true;
|
|
|
|
LOG_SYS("Failed to copy [" << fromPath << "] -> [" << toPath
|
|
<< "] while atomically renaming:");
|
|
removeFile(randFilename, false); // Cleanup.
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool compareFileContents(const std::string& rhsPath, const std::string& lhsPath)
|
|
{
|
|
std::ifstream rhs(rhsPath, std::ifstream::binary | std::ifstream::ate);
|
|
if (rhs.fail())
|
|
return false;
|
|
|
|
std::ifstream lhs(lhsPath, std::ifstream::binary | std::ifstream::ate);
|
|
if (lhs.fail())
|
|
return false;
|
|
|
|
if (rhs.tellg() != lhs.tellg())
|
|
return false;
|
|
|
|
rhs.seekg(0, std::ifstream::beg);
|
|
lhs.seekg(0, std::ifstream::beg);
|
|
return std::equal(std::istreambuf_iterator<char>(rhs.rdbuf()),
|
|
std::istreambuf_iterator<char>(),
|
|
std::istreambuf_iterator<char>(lhs.rdbuf()));
|
|
}
|
|
|
|
std::unique_ptr<std::vector<char>> readFile(const std::string& path, int maxSize)
|
|
{
|
|
const int fd = ::open(path.c_str(), O_RDONLY);
|
|
if (fd < 0)
|
|
return nullptr;
|
|
|
|
struct stat st;
|
|
if (::fstat(fd, &st) != 0 || st.st_size > maxSize)
|
|
{
|
|
::close(fd);
|
|
return nullptr;
|
|
}
|
|
|
|
auto data = std::make_unique<std::vector<char>>(st.st_size);
|
|
off_t off = 0;
|
|
for (;;)
|
|
{
|
|
if (st.st_size == 0)
|
|
{
|
|
// Nothing to read.
|
|
break;
|
|
}
|
|
|
|
int n;
|
|
while ((n = ::read(fd, &(*data)[off], st.st_size)) < 0 && errno == EINTR)
|
|
{
|
|
}
|
|
|
|
if (n <= 0)
|
|
{
|
|
if (n == 0) // EOF.
|
|
break;
|
|
|
|
::close(fd);
|
|
return nullptr; // Error.
|
|
}
|
|
|
|
off += n;
|
|
}
|
|
|
|
close(fd);
|
|
return data;
|
|
}
|
|
|
|
} // namespace FileUtil
|
|
|
|
namespace
|
|
{
|
|
|
|
struct fs
|
|
{
|
|
fs(const std::string& path, dev_t dev)
|
|
: _path(path), _dev(dev)
|
|
{
|
|
}
|
|
|
|
const std::string& getPath() const { return _path; }
|
|
|
|
dev_t getDev() const { return _dev; }
|
|
|
|
private:
|
|
std::string _path;
|
|
dev_t _dev;
|
|
};
|
|
|
|
struct fsComparator
|
|
{
|
|
bool operator() (const fs& lhs, const fs& rhs) const
|
|
{
|
|
return (lhs.getDev() < rhs.getDev());
|
|
}
|
|
};
|
|
|
|
static std::mutex fsmutex;
|
|
static std::set<fs, fsComparator> filesystems;
|
|
|
|
} // anonymous namespace
|
|
|
|
namespace FileUtil
|
|
{
|
|
#if !MOBILEAPP
|
|
void registerFileSystemForDiskSpaceChecks(const std::string& path)
|
|
{
|
|
const std::string::size_type lastSlash = path.rfind('/');
|
|
assert(path.empty() || lastSlash != std::string::npos);
|
|
if (lastSlash != std::string::npos)
|
|
{
|
|
const std::string dirPath = path.substr(0, lastSlash + 1) + '.';
|
|
LOG_INF("Registering filesystem for space checks: [" << dirPath << ']');
|
|
|
|
std::lock_guard<std::mutex> lock(fsmutex);
|
|
|
|
struct stat s;
|
|
if (stat(dirPath.c_str(), &s) == 0)
|
|
{
|
|
filesystems.insert(fs(dirPath, s.st_dev));
|
|
}
|
|
}
|
|
}
|
|
|
|
std::string checkDiskSpaceOnRegisteredFileSystems(const bool cacheLastCheck)
|
|
{
|
|
static std::chrono::steady_clock::time_point lastCheck;
|
|
static std::string lastResult;
|
|
std::chrono::steady_clock::time_point now(std::chrono::steady_clock::now());
|
|
|
|
std::lock_guard<std::mutex> lock(fsmutex);
|
|
|
|
if (cacheLastCheck)
|
|
{
|
|
// Don't check more often than once a minute
|
|
if (std::chrono::duration_cast<std::chrono::seconds>(now - lastCheck).count() < 60)
|
|
return lastResult;
|
|
|
|
lastCheck = now;
|
|
}
|
|
|
|
for (const auto& i: filesystems)
|
|
{
|
|
if (!checkDiskSpace(i.getPath()))
|
|
{
|
|
if (cacheLastCheck)
|
|
lastResult = i.getPath();
|
|
return i.getPath();
|
|
}
|
|
}
|
|
|
|
if (cacheLastCheck)
|
|
lastResult = std::string();
|
|
return std::string();
|
|
}
|
|
#endif
|
|
|
|
bool checkDiskSpace(const std::string& path)
|
|
{
|
|
assert(!path.empty());
|
|
|
|
#if !MOBILEAPP
|
|
bool hookResult = true;
|
|
if (UnitBase::get().filterCheckDiskSpace(path, hookResult))
|
|
return hookResult;
|
|
#endif
|
|
|
|
// we should be able to run just OK with 5GB for production or 1GB for development
|
|
#if defined(__linux__) || defined(__FreeBSD__) || defined(IOS)
|
|
#if ENABLE_DEBUG
|
|
constexpr int64_t gb(1);
|
|
#else
|
|
constexpr int64_t gb(5);
|
|
#endif
|
|
constexpr int64_t ENOUGH_SPACE = gb*1024*1024*1024;
|
|
#endif
|
|
|
|
#if defined(__linux__) || defined(__FreeBSD__)
|
|
struct statfs sfs;
|
|
if (statfs(path.c_str(), &sfs) == -1)
|
|
return true;
|
|
|
|
const int64_t freeBytes = static_cast<int64_t>(sfs.f_bavail) * sfs.f_bsize;
|
|
|
|
LOG_INF("Filesystem [" << path << "] has " << (freeBytes / 1024 / 1024) <<
|
|
" MB free (" << (sfs.f_bavail * 100. / sfs.f_blocks) << "%).");
|
|
|
|
if (freeBytes > ENOUGH_SPACE)
|
|
return true;
|
|
|
|
if (static_cast<double>(sfs.f_bavail) / sfs.f_blocks <= 0.05)
|
|
return false;
|
|
#elif defined IOS
|
|
NSDictionary *atDict = [[NSFileManager defaultManager] attributesOfFileSystemForPath:@"/" error:NULL];
|
|
long long freeSpace = [[atDict objectForKey:NSFileSystemFreeSize] longLongValue];
|
|
long long totalSpace = [[atDict objectForKey:NSFileSystemSize] longLongValue];
|
|
|
|
if (freeSpace > ENOUGH_SPACE)
|
|
return true;
|
|
|
|
if (static_cast<double>(freeSpace) / totalSpace <= 0.05)
|
|
return false;
|
|
#endif
|
|
|
|
return true;
|
|
}
|
|
|
|
namespace {
|
|
bool AnonymizeUserData = false;
|
|
std::uint64_t AnonymizationSalt = 82589933;
|
|
}
|
|
|
|
void setUrlAnonymization(bool anonymize, const std::uint64_t salt)
|
|
{
|
|
AnonymizeUserData = anonymize;
|
|
AnonymizationSalt = salt;
|
|
}
|
|
|
|
/// Anonymize the basename of filenames, preserving the path and extension.
|
|
std::string anonymizeUrl(const std::string& url)
|
|
{
|
|
return AnonymizeUserData ? Util::anonymizeUrl(url, AnonymizationSalt) : url;
|
|
}
|
|
|
|
/// Anonymize user names and IDs.
|
|
/// Will use the Obfuscated User ID if one is provided via WOPI.
|
|
std::string anonymizeUsername(const std::string& username)
|
|
{
|
|
return AnonymizeUserData ? Util::anonymize(username, AnonymizationSalt) : username;
|
|
}
|
|
|
|
std::string extractFileExtension(const std::string& path)
|
|
{
|
|
return Util::splitLast(path, '.', true).second;
|
|
}
|
|
|
|
} // namespace FileUtil
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|