libreoffice-online/common/FileUtil.cpp

544 lines
16 KiB
C++
Raw Normal View History

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
/*
* Copyright the Collabora Online contributors.
*
* SPDX-License-Identifier: MPL-2.0
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include <config.h>
#include "FileUtil.hpp"
wsd: faster jail setup via bind-mount loolmount now works and supports mounting and unmounting, plus numerous improvements, refactoring, logging, etc.. When enabled, binding improves the jail setup time by anywhere from 2x to orders of magnitude (in docker, f.e.). A new config entry mount_jail_tree controls whether mounting is used or the old method of linking/copying of jail contents. It is set to true by default and falls back to linking/copying. A test mount is done when the setting is enabled, and if mounting fails, it's disabled to avoid noise. Temporarily disabled for unit-tests until we can cleanup lingering mounts after Jenkins aborts our build job. In a future patch we will have mount/jail cleanup as part of make. The network/system files in /etc that need frequent refreshing are now updated in systemplate to make their most recent version available in the jails. These files can change during the course of loolwsd lifetime, and are unlikely to be updated in systemplate after installation at all. We link to them in the systemplate/etc directory, and if that fails, we copy them before forking each kit instance to have the latest. This reworks the approach used to bind-mount the jails and the templates such that the total is now down to only three mounts: systemplate, lo, tmp. As now systemplate and lotemplate are shared, they must be mounted as readonly, this means that user/ must now be moved into tmp/user/ which is writable. The mount-points must be recursive, because we mount lo/ within the mount-point of systemplate (which is the root of the jail). But because we (re)bind recursively, and because both systemplate and lotemplate are mounted for each jails, we need to make them unbindable, so they wouldn't multiply the mount-points for each jails (an explosive growth!) Contrarywise, we don't want the mount-points to be shared, because we don't expect to add/remove mounts after a jail is created. The random temp directory is now created and set correctly, plus many logging and other improvements. Change-Id: Iae3fda5e876cf47d2cae6669a87b5b826a8748df Reviewed-on: https://gerrit.libreoffice.org/c/online/+/92829 Tested-by: Jenkins Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com> Reviewed-by: Ashod Nakashian <ashnakash@gmail.com>
2020-04-09 08:02:58 -05:00
#include <dirent.h>
#include <exception>
#include <ftw.h>
#include <stdexcept>
#include <sys/time.h>
#ifdef __linux__
#include <sys/vfs.h>
#elif defined IOS
2018-08-29 10:23:22 -05:00
#import <Foundation/Foundation.h>
#elif defined __FreeBSD__
#include <sys/param.h>
#include <sys/mount.h>
2018-08-29 10:23:22 -05:00
#endif
#include <fcntl.h>
#include <chrono>
#include <cstdio>
#include <cstdlib>
#include <cstring>
#include <filesystem>
#include <fstream>
#include <mutex>
#include <string>
wsd: faster jail setup via bind-mount loolmount now works and supports mounting and unmounting, plus numerous improvements, refactoring, logging, etc.. When enabled, binding improves the jail setup time by anywhere from 2x to orders of magnitude (in docker, f.e.). A new config entry mount_jail_tree controls whether mounting is used or the old method of linking/copying of jail contents. It is set to true by default and falls back to linking/copying. A test mount is done when the setting is enabled, and if mounting fails, it's disabled to avoid noise. Temporarily disabled for unit-tests until we can cleanup lingering mounts after Jenkins aborts our build job. In a future patch we will have mount/jail cleanup as part of make. The network/system files in /etc that need frequent refreshing are now updated in systemplate to make their most recent version available in the jails. These files can change during the course of loolwsd lifetime, and are unlikely to be updated in systemplate after installation at all. We link to them in the systemplate/etc directory, and if that fails, we copy them before forking each kit instance to have the latest. This reworks the approach used to bind-mount the jails and the templates such that the total is now down to only three mounts: systemplate, lo, tmp. As now systemplate and lotemplate are shared, they must be mounted as readonly, this means that user/ must now be moved into tmp/user/ which is writable. The mount-points must be recursive, because we mount lo/ within the mount-point of systemplate (which is the root of the jail). But because we (re)bind recursively, and because both systemplate and lotemplate are mounted for each jails, we need to make them unbindable, so they wouldn't multiply the mount-points for each jails (an explosive growth!) Contrarywise, we don't want the mount-points to be shared, because we don't expect to add/remove mounts after a jail is created. The random temp directory is now created and set correctly, plus many logging and other improvements. Change-Id: Iae3fda5e876cf47d2cae6669a87b5b826a8748df Reviewed-on: https://gerrit.libreoffice.org/c/online/+/92829 Tested-by: Jenkins Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com> Reviewed-by: Ashod Nakashian <ashnakash@gmail.com>
2020-04-09 08:02:58 -05:00
#include <Poco/File.h>
#include <Poco/Path.h>
#include "Log.hpp"
#include "Util.hpp"
2017-03-31 11:18:41 -05:00
#include "Unit.hpp"
namespace FileUtil
{
std::string createRandomDir(const std::string& path)
{
std::string name = Util::rng::getFilename(64);
std::filesystem::create_directory(path + '/' + name);
return name;
}
bool copy(const std::string& fromPath, const std::string& toPath, bool log, bool throw_on_error)
{
int from = -1, to = -1;
try
{
from = open(fromPath.c_str(), O_RDONLY);
if (from < 0)
throw std::runtime_error("Failed to open src " + anonymizeUrl(fromPath));
struct stat st;
if (fstat(from, &st) != 0)
throw std::runtime_error("Failed to fstat src " + anonymizeUrl(fromPath));
to = open(toPath.c_str(), O_CREAT | O_TRUNC | O_WRONLY, st.st_mode);
if (to < 0)
throw std::runtime_error("Failed to open dest " + anonymizeUrl(toPath));
// Logging may be redundant and/or noisy.
if (log)
LOG_INF("Copying " << st.st_size << " bytes from " << anonymizeUrl(fromPath)
<< " to " << anonymizeUrl(toPath));
char buffer[64 * 1024];
int n;
off_t bytesIn = 0;
do
{
while ((n = ::read(from, buffer, sizeof(buffer))) < 0 && errno == EINTR)
LOG_TRC("EINTR reading from " << anonymizeUrl(fromPath));
if (n < 0)
throw std::runtime_error("Failed to read from " + anonymizeUrl(fromPath)
+ " at " + std::to_string(bytesIn) + " bytes in");
bytesIn += n;
if (n == 0) // EOF
break;
assert (off_t(sizeof (buffer)) >= n);
// Handle short writes and EINTR
for (int j = 0; j < n;)
{
int written;
while ((written = ::write(to, buffer + j, n - j)) < 0 && errno == EINTR)
LOG_TRC("EINTR writing to " << anonymizeUrl(toPath));
if (written < 0)
{
throw std::runtime_error("Failed to write " + std::to_string(n)
+ " bytes to " + anonymizeUrl(toPath) + " at "
+ std::to_string(bytesIn) + " bytes into "
+ anonymizeUrl(fromPath));
}
j += written;
}
} while (true);
if (bytesIn != st.st_size)
{
LOG_WRN("Unusual: file " << anonymizeUrl(fromPath) << " changed size "
"during copy from " << st.st_size << " to " << bytesIn);
}
close(from);
close(to);
return true;
}
catch (const std::exception& ex)
{
std::ostringstream oss;
oss << "Error while copying from " << anonymizeUrl(fromPath) << " to "
<< anonymizeUrl(toPath) << ": " << ex.what();
const std::string err = oss.str();
LOG_ERR(err);
close(from);
close(to);
unlink(toPath.c_str());
if (throw_on_error)
throw std::runtime_error(err);
}
return false;
}
std::string getSysTempDirectoryPath()
{
// Don't const to allow for automatic move on return.
std::string path = std::filesystem::temp_directory_path();
if (!path.empty())
return path;
// Sensible fallback, though shouldn't be needed.
const char *tmp = getenv("TMPDIR");
if (!tmp)
tmp = getenv("TEMP");
if (!tmp)
tmp = getenv("TMP");
if (!tmp)
tmp = "/tmp";
return tmp;
}
std::string createRandomTmpDir(std::string root)
{
if (root.empty())
root = getSysTempDirectoryPath();
Poco::File(root).createDirectories();
// Don't const to allow for automatic move on return.
std::string newTmp = root + "/cool-" + Util::rng::getFilename(16);
if (::mkdir(newTmp.c_str(), S_IRWXU) < 0)
{
LOG_SYS("Failed to create random temp directory [" << newTmp << ']');
return root;
}
return newTmp;
}
std::string createTmpDir(std::string dirName, std::string root)
{
if (root.empty())
root = getSysTempDirectoryPath();
Poco::File(root).createDirectories();
// Don't const to allow for automatic move on return.
std::string newTmp = root + '/' + dirName;
if (::mkdir(newTmp.c_str(), S_IRWXU) < 0)
{
LOG_SYS("Failed to create temp directory [" << newTmp << ']');
return root;
}
return newTmp;
}
#if 1 // !HAVE_STD_FILESYSTEM
static int nftw_cb(const char *fpath, const struct stat*, int type, struct FTW*)
{
if (type == FTW_DP)
{
rmdir(fpath);
}
else if (type == FTW_F || type == FTW_SL)
{
unlink(fpath);
}
// Always continue even when things go wrong.
return 0;
}
#endif
void removeFile(const std::string& path, const bool recursive)
{
wsd: faster jail setup via bind-mount loolmount now works and supports mounting and unmounting, plus numerous improvements, refactoring, logging, etc.. When enabled, binding improves the jail setup time by anywhere from 2x to orders of magnitude (in docker, f.e.). A new config entry mount_jail_tree controls whether mounting is used or the old method of linking/copying of jail contents. It is set to true by default and falls back to linking/copying. A test mount is done when the setting is enabled, and if mounting fails, it's disabled to avoid noise. Temporarily disabled for unit-tests until we can cleanup lingering mounts after Jenkins aborts our build job. In a future patch we will have mount/jail cleanup as part of make. The network/system files in /etc that need frequent refreshing are now updated in systemplate to make their most recent version available in the jails. These files can change during the course of loolwsd lifetime, and are unlikely to be updated in systemplate after installation at all. We link to them in the systemplate/etc directory, and if that fails, we copy them before forking each kit instance to have the latest. This reworks the approach used to bind-mount the jails and the templates such that the total is now down to only three mounts: systemplate, lo, tmp. As now systemplate and lotemplate are shared, they must be mounted as readonly, this means that user/ must now be moved into tmp/user/ which is writable. The mount-points must be recursive, because we mount lo/ within the mount-point of systemplate (which is the root of the jail). But because we (re)bind recursively, and because both systemplate and lotemplate are mounted for each jails, we need to make them unbindable, so they wouldn't multiply the mount-points for each jails (an explosive growth!) Contrarywise, we don't want the mount-points to be shared, because we don't expect to add/remove mounts after a jail is created. The random temp directory is now created and set correctly, plus many logging and other improvements. Change-Id: Iae3fda5e876cf47d2cae6669a87b5b826a8748df Reviewed-on: https://gerrit.libreoffice.org/c/online/+/92829 Tested-by: Jenkins Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com> Reviewed-by: Ashod Nakashian <ashnakash@gmail.com>
2020-04-09 08:02:58 -05:00
LOG_DBG("Removing [" << path << "] " << (recursive ? "recursively." : "only."));
// Amazingly filesystem::remove_all silently fails to work on some
// systems. No real need to be using experimental API here either.
#if 0 // HAVE_STD_FILESYSTEM
std::error_code ec;
if (recursive)
std::filesystem::remove_all(path, ec);
else
std::filesystem::remove(path, ec);
// Already removed or we don't care about failures.
(void) ec;
#else
try
{
struct stat sb;
wsd: faster jail setup via bind-mount loolmount now works and supports mounting and unmounting, plus numerous improvements, refactoring, logging, etc.. When enabled, binding improves the jail setup time by anywhere from 2x to orders of magnitude (in docker, f.e.). A new config entry mount_jail_tree controls whether mounting is used or the old method of linking/copying of jail contents. It is set to true by default and falls back to linking/copying. A test mount is done when the setting is enabled, and if mounting fails, it's disabled to avoid noise. Temporarily disabled for unit-tests until we can cleanup lingering mounts after Jenkins aborts our build job. In a future patch we will have mount/jail cleanup as part of make. The network/system files in /etc that need frequent refreshing are now updated in systemplate to make their most recent version available in the jails. These files can change during the course of loolwsd lifetime, and are unlikely to be updated in systemplate after installation at all. We link to them in the systemplate/etc directory, and if that fails, we copy them before forking each kit instance to have the latest. This reworks the approach used to bind-mount the jails and the templates such that the total is now down to only three mounts: systemplate, lo, tmp. As now systemplate and lotemplate are shared, they must be mounted as readonly, this means that user/ must now be moved into tmp/user/ which is writable. The mount-points must be recursive, because we mount lo/ within the mount-point of systemplate (which is the root of the jail). But because we (re)bind recursively, and because both systemplate and lotemplate are mounted for each jails, we need to make them unbindable, so they wouldn't multiply the mount-points for each jails (an explosive growth!) Contrarywise, we don't want the mount-points to be shared, because we don't expect to add/remove mounts after a jail is created. The random temp directory is now created and set correctly, plus many logging and other improvements. Change-Id: Iae3fda5e876cf47d2cae6669a87b5b826a8748df Reviewed-on: https://gerrit.libreoffice.org/c/online/+/92829 Tested-by: Jenkins Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com> Reviewed-by: Ashod Nakashian <ashnakash@gmail.com>
2020-04-09 08:02:58 -05:00
errno = 0;
if (!recursive || stat(path.c_str(), &sb) == -1 || S_ISREG(sb.st_mode))
{
wsd: faster jail setup via bind-mount loolmount now works and supports mounting and unmounting, plus numerous improvements, refactoring, logging, etc.. When enabled, binding improves the jail setup time by anywhere from 2x to orders of magnitude (in docker, f.e.). A new config entry mount_jail_tree controls whether mounting is used or the old method of linking/copying of jail contents. It is set to true by default and falls back to linking/copying. A test mount is done when the setting is enabled, and if mounting fails, it's disabled to avoid noise. Temporarily disabled for unit-tests until we can cleanup lingering mounts after Jenkins aborts our build job. In a future patch we will have mount/jail cleanup as part of make. The network/system files in /etc that need frequent refreshing are now updated in systemplate to make their most recent version available in the jails. These files can change during the course of loolwsd lifetime, and are unlikely to be updated in systemplate after installation at all. We link to them in the systemplate/etc directory, and if that fails, we copy them before forking each kit instance to have the latest. This reworks the approach used to bind-mount the jails and the templates such that the total is now down to only three mounts: systemplate, lo, tmp. As now systemplate and lotemplate are shared, they must be mounted as readonly, this means that user/ must now be moved into tmp/user/ which is writable. The mount-points must be recursive, because we mount lo/ within the mount-point of systemplate (which is the root of the jail). But because we (re)bind recursively, and because both systemplate and lotemplate are mounted for each jails, we need to make them unbindable, so they wouldn't multiply the mount-points for each jails (an explosive growth!) Contrarywise, we don't want the mount-points to be shared, because we don't expect to add/remove mounts after a jail is created. The random temp directory is now created and set correctly, plus many logging and other improvements. Change-Id: Iae3fda5e876cf47d2cae6669a87b5b826a8748df Reviewed-on: https://gerrit.libreoffice.org/c/online/+/92829 Tested-by: Jenkins Tested-by: Jenkins CollaboraOffice <jenkinscollaboraoffice@gmail.com> Reviewed-by: Ashod Nakashian <ashnakash@gmail.com>
2020-04-09 08:02:58 -05:00
// Non-recursive directories and files that exist.
if (errno != ENOENT)
Poco::File(path).remove(recursive);
}
else
{
// Directories only.
nftw(path.c_str(), nftw_cb, 128, FTW_DEPTH | FTW_PHYS);
}
}
catch (const std::exception& e)
{
// Don't complain if already non-existant.
if (FileUtil::Stat(path).exists())
{
// Error only if it still exists.
LOG_ERR("Failed to remove ["
<< path << "] " << (recursive ? "recursively: " : "only: ") << e.what());
}
}
#endif
}
std::string realpath(const char* path)
{
char* resolved = ::realpath(path, nullptr);
if (resolved)
{
std::string real = resolved;
free(resolved);
return real;
}
LOG_SYS("Failed to get the realpath of [" << path << ']');
return path;
}
bool isEmptyDirectory(const char* path)
{
DIR* dir = opendir(path);
if (dir == nullptr)
return errno != EACCES; // Assume it's not empty when EACCES.
int count = 0;
while (readdir(dir) && ++count < 3)
;
closedir(dir);
return count <= 2; // Discounting . and ..
}
bool isWritable(const char* path)
{
if (access(path, W_OK) == 0)
return true;
LOG_INF("No write access to path [" << path << "]: " << strerror(errno));
return false;
}
bool updateTimestamps(const std::string& filename, timespec tsAccess, timespec tsModified)
{
// The timestamp is in seconds and microseconds.
timeval timestamps[2]
{
{
tsAccess.tv_sec,
#ifdef IOS
(__darwin_suseconds_t)
#endif
(tsAccess.tv_nsec / 1000)
},
{
tsModified.tv_sec,
#ifdef IOS
(__darwin_suseconds_t)
#endif
(tsModified.tv_nsec / 1000)
}
};
if (utimes(filename.c_str(), timestamps) != 0)
{
LOG_SYS("Failed to update the timestamp of [" << filename << ']');
return false;
}
return true;
}
bool copyAtomic(const std::string& fromPath, const std::string& toPath, bool preserveTimestamps)
{
const std::string randFilename = toPath + Util::rng::getFilename(12);
if (copy(fromPath, randFilename, /*log=*/false, /*throw_on_error=*/false))
{
if (preserveTimestamps)
{
const Stat st(fromPath);
updateTimestamps(randFilename,
#ifdef IOS
st.sb().st_atimespec, st.sb().st_mtimespec
#else
st.sb().st_atim, st.sb().st_mtim
#endif
);
}
// Now rename atomically, replacing any existing files with the same name.
if (rename(randFilename.c_str(), toPath.c_str()) == 0)
return true;
LOG_SYS("Failed to copy [" << fromPath << "] -> [" << toPath
<< "] while atomically renaming:");
removeFile(randFilename, false); // Cleanup.
}
return false;
}
bool compareFileContents(const std::string& rhsPath, const std::string& lhsPath)
{
std::ifstream rhs(rhsPath, std::ifstream::binary | std::ifstream::ate);
if (rhs.fail())
return false;
std::ifstream lhs(lhsPath, std::ifstream::binary | std::ifstream::ate);
if (lhs.fail())
return false;
if (rhs.tellg() != lhs.tellg())
return false;
rhs.seekg(0, std::ifstream::beg);
lhs.seekg(0, std::ifstream::beg);
return std::equal(std::istreambuf_iterator<char>(rhs.rdbuf()),
std::istreambuf_iterator<char>(),
std::istreambuf_iterator<char>(lhs.rdbuf()));
}
std::unique_ptr<std::vector<char>> readFile(const std::string& path, int maxSize)
{
auto data = std::make_unique<std::vector<char>>(maxSize);
return (readFile(path, *data, maxSize) >= 0) ? std::move(data) : nullptr;
}
} // namespace FileUtil
namespace
{
struct fs
{
fs(const std::string& path, dev_t dev)
: _path(path), _dev(dev)
{
}
const std::string& getPath() const { return _path; }
dev_t getDev() const { return _dev; }
private:
std::string _path;
dev_t _dev;
};
struct fsComparator
{
bool operator() (const fs& lhs, const fs& rhs) const
{
return (lhs.getDev() < rhs.getDev());
}
};
static std::mutex fsmutex;
static std::set<fs, fsComparator> filesystems;
} // anonymous namespace
namespace FileUtil
{
#if !MOBILEAPP
void registerFileSystemForDiskSpaceChecks(const std::string& path)
{
const std::string::size_type lastSlash = path.rfind('/');
assert(path.empty() || lastSlash != std::string::npos);
if (lastSlash != std::string::npos)
{
const std::string dirPath = path.substr(0, lastSlash + 1) + '.';
LOG_INF("Registering filesystem for space checks: [" << dirPath << ']');
std::lock_guard<std::mutex> lock(fsmutex);
struct stat s;
if (stat(dirPath.c_str(), &s) == 0)
{
filesystems.insert(fs(dirPath, s.st_dev));
}
}
}
std::string checkDiskSpaceOnRegisteredFileSystems(const bool cacheLastCheck)
{
static std::chrono::steady_clock::time_point lastCheck;
static std::string lastResult;
std::chrono::steady_clock::time_point now(std::chrono::steady_clock::now());
std::lock_guard<std::mutex> lock(fsmutex);
if (cacheLastCheck)
{
// Don't check more often than once a minute
if (std::chrono::duration_cast<std::chrono::seconds>(now - lastCheck).count() < 60)
return lastResult;
lastCheck = now;
}
for (const auto& i: filesystems)
{
if (!checkDiskSpace(i.getPath()))
{
if (cacheLastCheck)
lastResult = i.getPath();
return i.getPath();
}
}
if (cacheLastCheck)
lastResult = std::string();
return std::string();
}
#endif
bool checkDiskSpace(const std::string& path)
{
assert(!path.empty());
if (!Util::isMobileApp())
{
bool hookResult = true;
if (UnitBase::get().filterCheckDiskSpace(path, hookResult))
return hookResult;
}
2017-03-31 11:18:41 -05:00
// we should be able to run just OK with 5GB for production or 1GB for development
#if defined(__linux__) || defined(__FreeBSD__) || defined(IOS)
#if ENABLE_DEBUG
constexpr int64_t gb(1);
#else
constexpr int64_t gb(5);
#endif
constexpr int64_t ENOUGH_SPACE = gb*1024*1024*1024;
#endif
2018-08-29 10:23:22 -05:00
#if defined(__linux__) || defined(__FreeBSD__)
struct statfs sfs;
if (statfs(path.c_str(), &sfs) == -1)
return true;
const int64_t freeBytes = static_cast<int64_t>(sfs.f_bavail) * sfs.f_bsize;
LOG_INF("Filesystem [" << path << "] has " << (freeBytes / 1024 / 1024) <<
" MB free (" << (sfs.f_bavail * 100. / sfs.f_blocks) << "%).");
if (freeBytes > ENOUGH_SPACE)
return true;
if (static_cast<double>(sfs.f_bavail) / sfs.f_blocks <= 0.05)
return false;
#elif defined IOS
2018-08-29 10:23:22 -05:00
NSDictionary *atDict = [[NSFileManager defaultManager] attributesOfFileSystemForPath:@"/" error:NULL];
long long freeSpace = [[atDict objectForKey:NSFileSystemFreeSize] longLongValue];
long long totalSpace = [[atDict objectForKey:NSFileSystemSize] longLongValue];
if (freeSpace > ENOUGH_SPACE)
return true;
if (static_cast<double>(freeSpace) / totalSpace <= 0.05)
return false;
#endif
return true;
}
namespace {
bool AnonymizeUserData = false;
std::uint64_t AnonymizationSalt = 82589933;
}
void setUrlAnonymization(bool anonymize, const std::uint64_t salt)
{
AnonymizeUserData = anonymize;
AnonymizationSalt = salt;
}
/// Anonymize the basename of filenames, preserving the path and extension.
std::string anonymizeUrl(const std::string& url)
{
return AnonymizeUserData ? Util::anonymizeUrl(url, AnonymizationSalt) : url;
}
/// Anonymize user names and IDs.
/// Will use the Obfuscated User ID if one is provided via WOPI.
std::string anonymizeUsername(const std::string& username)
{
return AnonymizeUserData ? Util::anonymize(username, AnonymizationSalt) : username;
}
std::string extractFileExtension(const std::string& path)
{
return Util::splitLast(path, '.', true).second;
}
} // namespace FileUtil
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */