wsd: parallel load with cool.html

Support parallel CheckFileInfo upon serving
cool.html. This makes loading documents faster
from the user's perspective by parallelizing
CheckFileInfo and, later, DocBroker creation
and document downloading.

Change-Id: I838a19022517196ee2b459991a542ed9ef323e48
Signed-off-by: Ashod Nakashian <ashod.nakashian@collabora.co.uk>
This commit is contained in:
Ashod Nakashian 2024-01-16 06:26:21 -05:00 committed by Ashod Nakashian
parent 93b1b736d1
commit 84aa1a44e8
5 changed files with 228 additions and 14 deletions

View file

@ -54,6 +54,9 @@
#include <string>
std::map<std::string, std::string> ClientRequestDispatcher::StaticFileContentCache;
std::unordered_map<std::string, std::shared_ptr<RequestVettingStation>>
ClientRequestDispatcher::RequestVettingStations;
extern std::map<std::string, std::shared_ptr<DocumentBroker>> DocBrokers;
extern std::mutex DocBrokersMutex;
@ -559,6 +562,31 @@ void ClientRequestDispatcher::handleIncomingMessage(SocketDisposition& dispositi
FileServerRequestHandler::ResourceAccessDetails accessDetails;
COOLWSD::FileRequestHandler->handleRequest(request, requestDetails, message, socket,
accessDetails);
if (accessDetails.isValid())
{
LOG_ASSERT_MSG(requestDetails.getField(RequestDetails::Field::WOPISrc) ==
accessDetails.wopiSrc(),
"Expected identical WOPISrc in the request as in cool.html");
const std::string requestKey = RequestDetails::getRequestKey(
accessDetails.wopiSrc(), accessDetails.accessToken());
std::vector<std::string> options = {
"access_token=" + accessDetails.accessToken(), "access_token_ttl=0"
};
const RequestDetails fullRequestDetails =
RequestDetails(accessDetails.wopiSrc(), options, /*compat=*/std::string());
LOG_TRC("Creating RVS with key: " << requestKey << ", for DocumentLoadURI: "
<< fullRequestDetails.getDocumentURI());
auto it = RequestVettingStations.emplace(
requestKey, std::make_shared<RequestVettingStation>(
COOLWSD::getWebServerPoll(), fullRequestDetails));
it.first->second->handleRequest(_id);
}
socket->shutdown();
}
}
@ -1637,14 +1665,31 @@ void ClientRequestDispatcher::handleClientWsUpgrade(const Poco::Net::HTTPRequest
#endif
}
_rvs = std::make_shared<RequestVettingStation>(COOLWSD::getWebServerPoll(), requestDetails);
const std::string requestKey = requestDetails.getRequestKey();
if (!requestKey.empty())
{
auto it = RequestVettingStations.find(requestKey);
if (it != RequestVettingStations.end())
{
LOG_TRC("Found RVS under key: " << requestKey);
_rvs = it->second;
RequestVettingStations.erase(it);
}
}
if (!_rvs)
{
LOG_TRC("Creating RVS");
_rvs = std::make_shared<RequestVettingStation>(COOLWSD::getWebServerPoll(),
requestDetails);
}
// Indicate to the client that document broker is searching.
static constexpr const char* const status = "statusindicator: find";
LOG_TRC("Sending to Client [" << status << ']');
ws->sendMessage(status);
_rvs->handleRequest(_id, ws, socket, mobileAppDocId, disposition);
_rvs->handleRequest(_id, requestDetails, ws, socket, mobileAppDocId, disposition);
}
catch (const std::exception& exc)
{

View file

@ -119,8 +119,9 @@ private:
/// External requests are first vetted before allocating DocBroker and Kit process.
/// This is a map of the request URI to the RequestVettingStation for vetting.
std::unordered_map<std::string, std::shared_ptr<RequestVettingStation>> _requestVettingStations;
static std::unordered_map<std::string, std::shared_ptr<RequestVettingStation>>
RequestVettingStations;
/// Cache for static files, to avoid reading and processing from disk.
static std::map<std::string, std::string> StaticFileContentCache;
};
};

View file

@ -154,6 +154,36 @@ public:
/// Returns false if the WOPISrc is not encoded correctly.
static bool validateWOPISrc(const std::string& uri) { return !Util::needsURIEncoding(uri); }
/// This is a per-document, per-user request key.
/// If a user makes two requests on the same document at the same time,
/// they will have the same request-key and we won't differentiate between them.
static std::string getRequestKey(const std::string& wopiSrc, const std::string& accessToken)
{
const std::string decodedWopiSrc = Util::decodeURIComponent(wopiSrc);
const Poco::URI wopiSrcSanitized = RequestDetails::sanitizeURI(decodedWopiSrc);
std::string requestKey = RequestDetails::getDocKey(wopiSrcSanitized);
requestKey += '_';
requestKey += accessToken;
return requestKey;
}
/// This is a per-document, per-user request key.
std::string getRequestKey() const
{
const std::string wopiSrc = getField(RequestDetails::Field::WOPISrc);
if (!wopiSrc.empty())
{
std::string accessToken;
getParamByName("access_token", accessToken);
return getRequestKey(wopiSrc, accessToken);
}
return std::string();
}
// matches the WOPISrc if used. For load balancing
// must be 2nd element in the path after /cool/<here>
std::string getLegacyDocumentURI() const { return getField(Field::LegacyDocumentURI); }
@ -161,6 +191,12 @@ public:
/// The DocumentURI, decoded. Doesn't contain WOPISrc or any other appendages.
std::string getDocumentURI() const { return getField(Field::DocumentURI); }
/// Returns the document-specific key from the DocumentURI.
std::string getDocKey() const
{
return RequestDetails::getDocKey(RequestDetails::sanitizeURI(getDocumentURI()));
}
/// The DocumentURI, decoded and sanitized. Doesn't contain WOPISrc or any other appendages.
std::string getDocumentURISanitized() const
{

View file

@ -14,6 +14,7 @@
#include <RequestVettingStation.hpp>
#include <COOLWSD.hpp>
#include <RequestDetails.hpp>
#include <TraceEvent.hpp>
#if !MOBILEAPP
#include <StorageConnectionManager.hpp>
@ -49,12 +50,85 @@ void sendLoadResult(const std::shared_ptr<ClientSession>& clientSession, bool su
} // anonymous namespace
void RequestVettingStation::handleRequest(const std::string& id)
{
_id = id;
const std::string url = _requestDetails.getDocumentURI();
LOG_INF("URL [" << url << "] will be proactively vetted");
const auto uriPublic = RequestDetails::sanitizeURI(url);
const auto docKey = RequestDetails::getDocKey(uriPublic);
const std::string fileId = Util::getFilenameFromURL(docKey);
Util::mapAnonymized(fileId, fileId); // Identity mapping, since fileId is already obfuscated
LOG_INF("Starting GET request handler for session [" << _id << "] on url ["
<< COOLWSD::anonymizeUrl(url) << ']');
LOG_INF("Sanitized URI [" << COOLWSD::anonymizeUrl(url) << "] to ["
<< COOLWSD::anonymizeUrl(uriPublic.toString())
<< "] and mapped to docKey [" << docKey << "] for session [" << _id
<< ']');
// Check if readonly session is required
bool isReadOnly = false;
for (const auto& param : uriPublic.getQueryParameters())
{
LOG_TRC("Query param: " << param.first << ", value: " << param.second);
if (param.first == "permission" && param.second == "readonly")
{
isReadOnly = true;
}
}
LOG_INF("URL [" << COOLWSD::anonymizeUrl(url) << "] is "
<< (isReadOnly ? "readonly" : "writable"));
// Before we create DocBroker with a SocketPoll thread, a ClientSession, and a Kit process,
// we need to vet this request by invoking CheckFileInfo.
// For that, we need the storage settings to create a connection.
const StorageBase::StorageType storageType =
StorageBase::validate(uriPublic, /*takeOwnership=*/false);
switch (storageType)
{
case StorageBase::StorageType::Unsupported:
LOG_ERR("Unsupported URI [" << COOLWSD::anonymizeUrl(uriPublic.toString())
<< "] or no storage configured");
throw BadRequestException("No Storage configured or invalid URI " +
COOLWSD::anonymizeUrl(uriPublic.toString()) + ']');
break;
case StorageBase::StorageType::Unauthorized:
LOG_ERR("No authorized hosts found matching the target host [" << uriPublic.getHost()
<< "] in config");
sendErrorAndShutdown(_ws, "error: cmd=internal kind=unauthorized",
WebSocketHandler::StatusCodes::POLICY_VIOLATION);
break;
case StorageBase::StorageType::FileSystem:
LOG_INF("URI [" << COOLWSD::anonymizeUrl(uriPublic.toString()) << "] on docKey ["
<< docKey << "] is for a FileSystem document");
break;
#if !MOBILEAPP
case StorageBase::StorageType::Wopi:
LOG_INF("URI [" << COOLWSD::anonymizeUrl(uriPublic.toString()) << "] on docKey ["
<< docKey << "] is for a WOPI document");
// CheckFileInfo asynchronously.
checkFileInfo(url, uriPublic, docKey, isReadOnly, RedirectionLimit);
break;
#endif //!MOBILEAPP
}
}
void RequestVettingStation::handleRequest(const std::string& id,
const RequestDetails& requestDetails,
const std::shared_ptr<WebSocketHandler>& ws,
const std::shared_ptr<StreamSocket>& socket,
unsigned mobileAppDocId, SocketDisposition& disposition)
{
_id = id;
_requestDetails = requestDetails;
_ws = ws;
_socket = socket;
_mobileAppDocId = mobileAppDocId;
@ -143,7 +217,26 @@ void RequestVettingStation::handleRequest(const std::string& id,
<< docKey << ']');
// CheckFileInfo and only when it's good create DocBroker.
checkFileInfo(url, uriPublic, docKey, isReadOnly, RedirectionLimit);
if (_cfiState == CFIState::Active)
{
// Wait for CheckFileInfo result.
LOG_DBG("CheckFileInfo request is in progress. Will resume when done");
}
else if (_cfiState == CFIState::Pass && _wopiInfo)
{
// We have a valid CheckFileInfo result; Create the DocBroker.
createDocBroker(docKey, url, uriPublic, isReadOnly);
}
else if (_cfiState == CFIState::None)
{
// We don't have CheckFileInfo
checkFileInfo(url, uriPublic, docKey, isReadOnly, RedirectionLimit);
}
else
{
sendErrorAndShutdown(_ws, "error: cmd=internal kind=unauthorized",
WebSocketHandler::StatusCodes::POLICY_VIOLATION);
}
});
break;
#endif //!MOBILEAPP
@ -226,17 +319,15 @@ void RequestVettingStation::checkFileInfo(const std::string& url, const Poco::UR
if (failed)
{
_cfiState = CFIState::Fail;
if (httpResponse->statusLine().statusCode() == http::StatusCode::Forbidden)
{
LOG_ERR("Access denied to [" << uriAnonym << ']');
sendErrorAndShutdown(_ws, "error: cmd=storage kind=unauthorized",
WebSocketHandler::StatusCodes::POLICY_VIOLATION);
return;
}
LOG_ERR("Invalid URI or access denied to [" << uriAnonym << ']');
sendErrorAndShutdown(_ws, "error: cmd=storage kind=unauthorized",
WebSocketHandler::StatusCodes::POLICY_VIOLATION);
return;
}
@ -246,9 +337,17 @@ void RequestVettingStation::checkFileInfo(const std::string& url, const Poco::UR
LOG_DBG("WOPI::CheckFileInfo (" << callDurationMs << "): anonymizing...");
else
LOG_DBG("WOPI::CheckFileInfo (" << callDurationMs << "): " << wopiResponse);
_cfiState = CFIState::Pass;
if (_ws)
{
createDocBroker(docKey, url, uriPublic, isReadOnly);
}
}
else
{
_cfiState = CFIState::Fail;
if (COOLWSD::AnonymizeUserData)
wopiResponse = "obfuscated";
@ -261,14 +360,15 @@ void RequestVettingStation::checkFileInfo(const std::string& url, const Poco::UR
sendErrorAndShutdown(_ws, "error: cmd=storage kind=unauthorized",
WebSocketHandler::StatusCodes::POLICY_VIOLATION);
}
createDocBroker(docKey, url, uriPublic, isReadOnly);
};
_httpSession->setFinishedHandler(std::move(finishedCallback));
// Run the CheckFileInfo request on the WebServer Poll.
_httpSession->asyncRequest(httpRequest, *_poll);
if (_httpSession->asyncRequest(httpRequest, *_poll))
{
_cfiState = CFIState::Active;
}
}
#endif //!MOBILEAPP

View file

@ -13,24 +13,55 @@
#include "RequestDetails.hpp"
#include <Storage.hpp>
#include "StateEnum.hpp"
#include "WebSocketHandler.hpp"
#include <string>
/// RequestVettingStation is used to vet the request in the background.
/// Vetting for a WOPI request is performed through CheckFileInfo.
/// Once the request checks out, we can proceed to creating a
/// DocBroker and a Kit process.
/// There are two ways to use this class. One is to create it when
/// serving cool.html, the other when the WebSocket is created
/// (by upgrading the socket).
/// Unfortunately, when serving cool.html the connection is not the one
/// used for the WebSocket. As such, it cannot be used to create
/// DocBroker. Therefore, we work in two modes: we do the CheckFileInfo
/// as soon as we serve cool.html, but then we need to wait for the
/// WebSocket to create DocBroker.
/// A small complication is that CheckFileInfo might not be done by
/// then. Or, it might have timed out. Alternatively, the WebSocket
/// might never arrive (say, because the user clicked away).
/// We take these possibilities into account and support them here.
class RequestVettingStation
{
/// The CheckFileInfo State.
STATE_ENUM(CFIState, None, Active, Timedout, Fail, Pass);
public:
/// Create an instance with a SocketPoll and a RequestDetails instance.
RequestVettingStation(const std::shared_ptr<TerminatingPoll>& poll,
const RequestDetails& requestDetails)
: _poll(poll)
, _requestDetails(requestDetails)
, _cfiState(CFIState::None)
{
}
inline void logPrefix(std::ostream& os) const { os << '#' << _socket->getFD() << ": "; }
inline void logPrefix(std::ostream& os) const
{
if (_socket)
{
os << '#' << _socket->getFD() << ": ";
}
}
void handleRequest(const std::string& id, const std::shared_ptr<WebSocketHandler>& ws,
/// Called when cool.html is served, to start the vetting as early as possible.
void handleRequest(const std::string& id);
void handleRequest(const std::string& id, const RequestDetails& requestDetails,
const std::shared_ptr<WebSocketHandler>& ws,
const std::shared_ptr<StreamSocket>& socket, unsigned mobileAppDocId,
SocketDisposition& disposition);
@ -56,6 +87,7 @@ private:
std::shared_ptr<StreamSocket> _socket;
std::shared_ptr<http::Session> _httpSession;
unsigned _mobileAppDocId;
CFIState _cfiState;
Poco::JSON::Object::Ptr _wopiInfo;
LockContext _lockCtx;
};