From 84aa1a44e8c75b45c0fc86592af935d42470d580 Mon Sep 17 00:00:00 2001 From: Ashod Nakashian Date: Tue, 16 Jan 2024 06:26:21 -0500 Subject: [PATCH] wsd: parallel load with cool.html Support parallel CheckFileInfo upon serving cool.html. This makes loading documents faster from the user's perspective by parallelizing CheckFileInfo and, later, DocBroker creation and document downloading. Change-Id: I838a19022517196ee2b459991a542ed9ef323e48 Signed-off-by: Ashod Nakashian --- wsd/ClientRequestDispatcher.cpp | 49 +++++++++++++- wsd/ClientRequestDispatcher.hpp | 5 +- wsd/RequestDetails.hpp | 36 ++++++++++ wsd/RequestVettingStation.cpp | 116 +++++++++++++++++++++++++++++--- wsd/RequestVettingStation.hpp | 36 +++++++++- 5 files changed, 228 insertions(+), 14 deletions(-) diff --git a/wsd/ClientRequestDispatcher.cpp b/wsd/ClientRequestDispatcher.cpp index b7ba4fd88..9d3eb4717 100644 --- a/wsd/ClientRequestDispatcher.cpp +++ b/wsd/ClientRequestDispatcher.cpp @@ -54,6 +54,9 @@ #include std::map ClientRequestDispatcher::StaticFileContentCache; +std::unordered_map> + ClientRequestDispatcher::RequestVettingStations; + extern std::map> DocBrokers; extern std::mutex DocBrokersMutex; @@ -559,6 +562,31 @@ void ClientRequestDispatcher::handleIncomingMessage(SocketDisposition& dispositi FileServerRequestHandler::ResourceAccessDetails accessDetails; COOLWSD::FileRequestHandler->handleRequest(request, requestDetails, message, socket, accessDetails); + if (accessDetails.isValid()) + { + LOG_ASSERT_MSG(requestDetails.getField(RequestDetails::Field::WOPISrc) == + accessDetails.wopiSrc(), + "Expected identical WOPISrc in the request as in cool.html"); + + const std::string requestKey = RequestDetails::getRequestKey( + accessDetails.wopiSrc(), accessDetails.accessToken()); + + std::vector options = { + "access_token=" + accessDetails.accessToken(), "access_token_ttl=0" + }; + + const RequestDetails fullRequestDetails = + RequestDetails(accessDetails.wopiSrc(), options, /*compat=*/std::string()); + LOG_TRC("Creating RVS with key: " << requestKey << ", for DocumentLoadURI: " + << fullRequestDetails.getDocumentURI()); + + auto it = RequestVettingStations.emplace( + requestKey, std::make_shared( + COOLWSD::getWebServerPoll(), fullRequestDetails)); + + it.first->second->handleRequest(_id); + } + socket->shutdown(); } } @@ -1637,14 +1665,31 @@ void ClientRequestDispatcher::handleClientWsUpgrade(const Poco::Net::HTTPRequest #endif } - _rvs = std::make_shared(COOLWSD::getWebServerPoll(), requestDetails); + const std::string requestKey = requestDetails.getRequestKey(); + if (!requestKey.empty()) + { + auto it = RequestVettingStations.find(requestKey); + if (it != RequestVettingStations.end()) + { + LOG_TRC("Found RVS under key: " << requestKey); + _rvs = it->second; + RequestVettingStations.erase(it); + } + } + + if (!_rvs) + { + LOG_TRC("Creating RVS"); + _rvs = std::make_shared(COOLWSD::getWebServerPoll(), + requestDetails); + } // Indicate to the client that document broker is searching. static constexpr const char* const status = "statusindicator: find"; LOG_TRC("Sending to Client [" << status << ']'); ws->sendMessage(status); - _rvs->handleRequest(_id, ws, socket, mobileAppDocId, disposition); + _rvs->handleRequest(_id, requestDetails, ws, socket, mobileAppDocId, disposition); } catch (const std::exception& exc) { diff --git a/wsd/ClientRequestDispatcher.hpp b/wsd/ClientRequestDispatcher.hpp index d50a98dce..04d4d2aac 100644 --- a/wsd/ClientRequestDispatcher.hpp +++ b/wsd/ClientRequestDispatcher.hpp @@ -119,8 +119,9 @@ private: /// External requests are first vetted before allocating DocBroker and Kit process. /// This is a map of the request URI to the RequestVettingStation for vetting. - std::unordered_map> _requestVettingStations; + static std::unordered_map> + RequestVettingStations; /// Cache for static files, to avoid reading and processing from disk. static std::map StaticFileContentCache; -}; \ No newline at end of file +}; diff --git a/wsd/RequestDetails.hpp b/wsd/RequestDetails.hpp index 52cd42fe8..a7d523325 100644 --- a/wsd/RequestDetails.hpp +++ b/wsd/RequestDetails.hpp @@ -154,6 +154,36 @@ public: /// Returns false if the WOPISrc is not encoded correctly. static bool validateWOPISrc(const std::string& uri) { return !Util::needsURIEncoding(uri); } + /// This is a per-document, per-user request key. + /// If a user makes two requests on the same document at the same time, + /// they will have the same request-key and we won't differentiate between them. + static std::string getRequestKey(const std::string& wopiSrc, const std::string& accessToken) + { + const std::string decodedWopiSrc = Util::decodeURIComponent(wopiSrc); + const Poco::URI wopiSrcSanitized = RequestDetails::sanitizeURI(decodedWopiSrc); + + std::string requestKey = RequestDetails::getDocKey(wopiSrcSanitized); + requestKey += '_'; + requestKey += accessToken; + + return requestKey; + } + + /// This is a per-document, per-user request key. + std::string getRequestKey() const + { + const std::string wopiSrc = getField(RequestDetails::Field::WOPISrc); + if (!wopiSrc.empty()) + { + std::string accessToken; + getParamByName("access_token", accessToken); + + return getRequestKey(wopiSrc, accessToken); + } + + return std::string(); + } + // matches the WOPISrc if used. For load balancing // must be 2nd element in the path after /cool/ std::string getLegacyDocumentURI() const { return getField(Field::LegacyDocumentURI); } @@ -161,6 +191,12 @@ public: /// The DocumentURI, decoded. Doesn't contain WOPISrc or any other appendages. std::string getDocumentURI() const { return getField(Field::DocumentURI); } + /// Returns the document-specific key from the DocumentURI. + std::string getDocKey() const + { + return RequestDetails::getDocKey(RequestDetails::sanitizeURI(getDocumentURI())); + } + /// The DocumentURI, decoded and sanitized. Doesn't contain WOPISrc or any other appendages. std::string getDocumentURISanitized() const { diff --git a/wsd/RequestVettingStation.cpp b/wsd/RequestVettingStation.cpp index 8312eef4f..3feb99973 100644 --- a/wsd/RequestVettingStation.cpp +++ b/wsd/RequestVettingStation.cpp @@ -14,6 +14,7 @@ #include #include +#include #include #if !MOBILEAPP #include @@ -49,12 +50,85 @@ void sendLoadResult(const std::shared_ptr& clientSession, bool su } // anonymous namespace +void RequestVettingStation::handleRequest(const std::string& id) +{ + _id = id; + + const std::string url = _requestDetails.getDocumentURI(); + + LOG_INF("URL [" << url << "] will be proactively vetted"); + const auto uriPublic = RequestDetails::sanitizeURI(url); + const auto docKey = RequestDetails::getDocKey(uriPublic); + const std::string fileId = Util::getFilenameFromURL(docKey); + Util::mapAnonymized(fileId, fileId); // Identity mapping, since fileId is already obfuscated + + LOG_INF("Starting GET request handler for session [" << _id << "] on url [" + << COOLWSD::anonymizeUrl(url) << ']'); + + LOG_INF("Sanitized URI [" << COOLWSD::anonymizeUrl(url) << "] to [" + << COOLWSD::anonymizeUrl(uriPublic.toString()) + << "] and mapped to docKey [" << docKey << "] for session [" << _id + << ']'); + + // Check if readonly session is required + bool isReadOnly = false; + for (const auto& param : uriPublic.getQueryParameters()) + { + LOG_TRC("Query param: " << param.first << ", value: " << param.second); + if (param.first == "permission" && param.second == "readonly") + { + isReadOnly = true; + } + } + + LOG_INF("URL [" << COOLWSD::anonymizeUrl(url) << "] is " + << (isReadOnly ? "readonly" : "writable")); + + // Before we create DocBroker with a SocketPoll thread, a ClientSession, and a Kit process, + // we need to vet this request by invoking CheckFileInfo. + // For that, we need the storage settings to create a connection. + const StorageBase::StorageType storageType = + StorageBase::validate(uriPublic, /*takeOwnership=*/false); + switch (storageType) + { + case StorageBase::StorageType::Unsupported: + LOG_ERR("Unsupported URI [" << COOLWSD::anonymizeUrl(uriPublic.toString()) + << "] or no storage configured"); + throw BadRequestException("No Storage configured or invalid URI " + + COOLWSD::anonymizeUrl(uriPublic.toString()) + ']'); + + break; + case StorageBase::StorageType::Unauthorized: + LOG_ERR("No authorized hosts found matching the target host [" << uriPublic.getHost() + << "] in config"); + sendErrorAndShutdown(_ws, "error: cmd=internal kind=unauthorized", + WebSocketHandler::StatusCodes::POLICY_VIOLATION); + break; + + case StorageBase::StorageType::FileSystem: + LOG_INF("URI [" << COOLWSD::anonymizeUrl(uriPublic.toString()) << "] on docKey [" + << docKey << "] is for a FileSystem document"); + break; +#if !MOBILEAPP + case StorageBase::StorageType::Wopi: + LOG_INF("URI [" << COOLWSD::anonymizeUrl(uriPublic.toString()) << "] on docKey [" + << docKey << "] is for a WOPI document"); + + // CheckFileInfo asynchronously. + checkFileInfo(url, uriPublic, docKey, isReadOnly, RedirectionLimit); + break; +#endif //!MOBILEAPP + } +} + void RequestVettingStation::handleRequest(const std::string& id, + const RequestDetails& requestDetails, const std::shared_ptr& ws, const std::shared_ptr& socket, unsigned mobileAppDocId, SocketDisposition& disposition) { _id = id; + _requestDetails = requestDetails; _ws = ws; _socket = socket; _mobileAppDocId = mobileAppDocId; @@ -143,7 +217,26 @@ void RequestVettingStation::handleRequest(const std::string& id, << docKey << ']'); // CheckFileInfo and only when it's good create DocBroker. - checkFileInfo(url, uriPublic, docKey, isReadOnly, RedirectionLimit); + if (_cfiState == CFIState::Active) + { + // Wait for CheckFileInfo result. + LOG_DBG("CheckFileInfo request is in progress. Will resume when done"); + } + else if (_cfiState == CFIState::Pass && _wopiInfo) + { + // We have a valid CheckFileInfo result; Create the DocBroker. + createDocBroker(docKey, url, uriPublic, isReadOnly); + } + else if (_cfiState == CFIState::None) + { + // We don't have CheckFileInfo + checkFileInfo(url, uriPublic, docKey, isReadOnly, RedirectionLimit); + } + else + { + sendErrorAndShutdown(_ws, "error: cmd=internal kind=unauthorized", + WebSocketHandler::StatusCodes::POLICY_VIOLATION); + } }); break; #endif //!MOBILEAPP @@ -226,17 +319,15 @@ void RequestVettingStation::checkFileInfo(const std::string& url, const Poco::UR if (failed) { + _cfiState = CFIState::Fail; + if (httpResponse->statusLine().statusCode() == http::StatusCode::Forbidden) { LOG_ERR("Access denied to [" << uriAnonym << ']'); - sendErrorAndShutdown(_ws, "error: cmd=storage kind=unauthorized", - WebSocketHandler::StatusCodes::POLICY_VIOLATION); return; } LOG_ERR("Invalid URI or access denied to [" << uriAnonym << ']'); - sendErrorAndShutdown(_ws, "error: cmd=storage kind=unauthorized", - WebSocketHandler::StatusCodes::POLICY_VIOLATION); return; } @@ -246,9 +337,17 @@ void RequestVettingStation::checkFileInfo(const std::string& url, const Poco::UR LOG_DBG("WOPI::CheckFileInfo (" << callDurationMs << "): anonymizing..."); else LOG_DBG("WOPI::CheckFileInfo (" << callDurationMs << "): " << wopiResponse); + + _cfiState = CFIState::Pass; + if (_ws) + { + createDocBroker(docKey, url, uriPublic, isReadOnly); + } } else { + _cfiState = CFIState::Fail; + if (COOLWSD::AnonymizeUserData) wopiResponse = "obfuscated"; @@ -261,14 +360,15 @@ void RequestVettingStation::checkFileInfo(const std::string& url, const Poco::UR sendErrorAndShutdown(_ws, "error: cmd=storage kind=unauthorized", WebSocketHandler::StatusCodes::POLICY_VIOLATION); } - - createDocBroker(docKey, url, uriPublic, isReadOnly); }; _httpSession->setFinishedHandler(std::move(finishedCallback)); // Run the CheckFileInfo request on the WebServer Poll. - _httpSession->asyncRequest(httpRequest, *_poll); + if (_httpSession->asyncRequest(httpRequest, *_poll)) + { + _cfiState = CFIState::Active; + } } #endif //!MOBILEAPP diff --git a/wsd/RequestVettingStation.hpp b/wsd/RequestVettingStation.hpp index db2ca66d9..6d72f29e8 100644 --- a/wsd/RequestVettingStation.hpp +++ b/wsd/RequestVettingStation.hpp @@ -13,24 +13,55 @@ #include "RequestDetails.hpp" #include +#include "StateEnum.hpp" #include "WebSocketHandler.hpp" #include +/// RequestVettingStation is used to vet the request in the background. +/// Vetting for a WOPI request is performed through CheckFileInfo. +/// Once the request checks out, we can proceed to creating a +/// DocBroker and a Kit process. +/// There are two ways to use this class. One is to create it when +/// serving cool.html, the other when the WebSocket is created +/// (by upgrading the socket). +/// Unfortunately, when serving cool.html the connection is not the one +/// used for the WebSocket. As such, it cannot be used to create +/// DocBroker. Therefore, we work in two modes: we do the CheckFileInfo +/// as soon as we serve cool.html, but then we need to wait for the +/// WebSocket to create DocBroker. +/// A small complication is that CheckFileInfo might not be done by +/// then. Or, it might have timed out. Alternatively, the WebSocket +/// might never arrive (say, because the user clicked away). +/// We take these possibilities into account and support them here. class RequestVettingStation { + /// The CheckFileInfo State. + STATE_ENUM(CFIState, None, Active, Timedout, Fail, Pass); + public: /// Create an instance with a SocketPoll and a RequestDetails instance. RequestVettingStation(const std::shared_ptr& poll, const RequestDetails& requestDetails) : _poll(poll) , _requestDetails(requestDetails) + , _cfiState(CFIState::None) { } - inline void logPrefix(std::ostream& os) const { os << '#' << _socket->getFD() << ": "; } + inline void logPrefix(std::ostream& os) const + { + if (_socket) + { + os << '#' << _socket->getFD() << ": "; + } + } - void handleRequest(const std::string& id, const std::shared_ptr& ws, + /// Called when cool.html is served, to start the vetting as early as possible. + void handleRequest(const std::string& id); + + void handleRequest(const std::string& id, const RequestDetails& requestDetails, + const std::shared_ptr& ws, const std::shared_ptr& socket, unsigned mobileAppDocId, SocketDisposition& disposition); @@ -56,6 +87,7 @@ private: std::shared_ptr _socket; std::shared_ptr _httpSession; unsigned _mobileAppDocId; + CFIState _cfiState; Poco::JSON::Object::Ptr _wopiInfo; LockContext _lockCtx; };