24317d555f
Given a well-formed utf-8 string 'message' of messageLen bytes and a desire to truncate to approximately abbrevLen bytes return the shortest string greater or equal to abbrevLen that does not split a utf-8 sequence. Signed-off-by: Caolán McNamara <caolan.mcnamara@collabora.com> Change-Id: Ie623d8c1027bb4724485cab5b0bb6d3d1cd3d9ab
350 lines
12 KiB
C++
350 lines
12 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
|
|
/*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <cstdint>
|
|
#include <cstdlib>
|
|
#include <cstring>
|
|
#include <iomanip>
|
|
#include <map>
|
|
#include <regex>
|
|
#include <sstream>
|
|
#include <string>
|
|
#include <string_view>
|
|
#include <vector>
|
|
|
|
#include <StringVector.hpp>
|
|
#include <Util.hpp>
|
|
|
|
#define LOK_USE_UNSTABLE_API
|
|
#include <LibreOfficeKit/LibreOfficeKitEnums.h>
|
|
|
|
namespace COOLProtocol
|
|
{
|
|
// Protocol Version Number.
|
|
// See protocol.txt.
|
|
constexpr unsigned ProtocolMajorVersionNumber = 0;
|
|
constexpr unsigned ProtocolMinorVersionNumber = 1;
|
|
|
|
inline std::string GetProtocolVersion()
|
|
{
|
|
return std::to_string(ProtocolMajorVersionNumber) + '.'
|
|
+ std::to_string(ProtocolMinorVersionNumber);
|
|
}
|
|
|
|
// Parse a string into a version tuple.
|
|
// Negative numbers for error.
|
|
std::tuple<int, int, std::string> ParseVersion(const std::string& version);
|
|
|
|
inline bool stringToInteger(const std::string& input, int& value)
|
|
{
|
|
bool res;
|
|
std::tie(value, res) = Util::i32FromString(input);
|
|
return res;
|
|
}
|
|
|
|
inline bool stringToUInt32(const std::string& input, uint32_t& value)
|
|
{
|
|
bool res;
|
|
std::tie(value, res) = Util::i32FromString(input);
|
|
return res;
|
|
}
|
|
|
|
inline bool stringToUInt64(const std::string& input, uint64_t& value)
|
|
{
|
|
bool res;
|
|
std::tie(value, res) = Util::u64FromString(input);
|
|
return res;
|
|
}
|
|
|
|
inline
|
|
bool parseNameValuePair(const std::string& token, std::string& name, std::string& value, const char delim = '=')
|
|
{
|
|
const size_t mid = token.find_first_of(delim);
|
|
if (mid != std::string::npos)
|
|
{
|
|
name = token.substr(0, mid);
|
|
value = token.substr(mid + 1);
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool getTokenInteger(const std::string& token, const std::string_view name, int& value);
|
|
bool getTokenUInt32(const std::string& token, const std::string_view name, uint32_t& value);
|
|
bool getTokenUInt64(const std::string& token, const std::string_view name, uint64_t& value);
|
|
bool getTokenString(const std::string& token, const std::string_view name, std::string& value);
|
|
bool getTokenKeyword(const std::string& token, const std::string_view name, const std::map<std::string, int>& map, int& value);
|
|
|
|
bool getTokenKeyword(const StringVector& tokens, const std::string_view name, const std::map<std::string, int>& map, int& value);
|
|
|
|
bool getTokenInteger(const StringVector& tokens, const std::string_view name, int& value);
|
|
|
|
/// Literal-string token names.
|
|
template <std::size_t N>
|
|
inline bool getTokenInteger(const std::string& token, const char (&name)[N], int& value)
|
|
{
|
|
// N includes null termination.
|
|
static_assert(N > 1, "Token name must be at least one character long.");
|
|
if (token.size() > N && token[N - 1] == '=' && token.compare(0, N - 1, name) == 0)
|
|
{
|
|
const char* str = token.data() + N;
|
|
char* endptr = nullptr;
|
|
value = std::strtol(str, &endptr, 10);
|
|
return (endptr > str);
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/// Extracts a name and value from token. Returns true if value is a non-negative integer.
|
|
template <std::size_t N>
|
|
inline bool getNonNegTokenInteger(const std::string& token, const char (&name)[N], int& value)
|
|
{
|
|
return getTokenInteger(token, name, value) && value >= 0;
|
|
}
|
|
|
|
inline bool getTokenString(const StringVector& tokens,
|
|
const std::string_view name,
|
|
std::string& value)
|
|
{
|
|
for (const auto& token : tokens)
|
|
{
|
|
if (getTokenString(tokens.getParam(token), name, value))
|
|
{
|
|
return true;
|
|
}
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
bool getTokenStringFromMessage(const std::string& message, const std::string_view name, std::string& value);
|
|
bool getTokenKeywordFromMessage(const std::string& message, const std::string_view name, const std::map<std::string, int>& map, int& value);
|
|
|
|
inline
|
|
std::vector<int> tokenizeInts(const char* data, const size_t size, const char delimiter = ',')
|
|
{
|
|
std::vector<int> tokens;
|
|
if (size == 0 || data == nullptr)
|
|
return tokens;
|
|
|
|
const char* start = data;
|
|
const char* end = data;
|
|
for (size_t i = 0; i < size && data[i] != '\n'; ++i, ++end)
|
|
{
|
|
if (data[i] == delimiter)
|
|
{
|
|
if (start != end && *start != delimiter)
|
|
tokens.emplace_back(std::atoi(start));
|
|
|
|
start = end;
|
|
}
|
|
else if (*start == delimiter)
|
|
++start;
|
|
}
|
|
|
|
if (start != end && *start != delimiter && *start != '\n')
|
|
tokens.emplace_back(std::atoi(start));
|
|
|
|
return tokens;
|
|
}
|
|
|
|
inline
|
|
std::vector<int> tokenizeInts(const std::string& s, const char delimiter = ',')
|
|
{
|
|
return tokenizeInts(s.data(), s.size(), delimiter);
|
|
}
|
|
|
|
inline bool getTokenIntegerFromMessage(const std::string& message, const std::string_view name, int& value)
|
|
{
|
|
return getTokenInteger(StringVector::tokenize(message), name, value);
|
|
}
|
|
|
|
/// Returns the first token of a message.
|
|
inline
|
|
std::string getFirstToken(const char *message, const int length, const char delim = ' ')
|
|
{
|
|
return Util::getDelimitedInitialSubstring(message, length, delim);
|
|
}
|
|
|
|
template <typename T>
|
|
std::string getFirstToken(const T& message, const char delim = ' ')
|
|
{
|
|
return getFirstToken(message.data(), message.size(), delim);
|
|
}
|
|
|
|
inline
|
|
bool matchPrefix(const std::string_view prefix, const std::string_view message)
|
|
{
|
|
return (message.size() >= prefix.size() &&
|
|
message.compare(0, prefix.size(), prefix) == 0);
|
|
}
|
|
|
|
inline
|
|
bool matchPrefix(const std::string_view prefix, const std::vector<char>& message)
|
|
{
|
|
return (message.size() >= prefix.size() &&
|
|
prefix.compare(0, prefix.size(), message.data(), prefix.size()) == 0);
|
|
}
|
|
|
|
inline
|
|
bool matchPrefix(const std::string_view prefix, const std::string_view message, const bool ignoreWhitespace)
|
|
{
|
|
if (ignoreWhitespace)
|
|
{
|
|
const size_t posPre = prefix.find_first_not_of(' ');
|
|
const size_t posMsg = message.find_first_not_of(' ');
|
|
|
|
return matchPrefix(posPre == std::string::npos ? prefix : prefix.substr(posPre),
|
|
posMsg == std::string::npos ? message : message.substr(posMsg));
|
|
}
|
|
else
|
|
{
|
|
return matchPrefix(prefix, message);
|
|
}
|
|
}
|
|
|
|
/// Returns true if the token is a user-interaction token.
|
|
/// Currently this excludes commands sent automatically.
|
|
/// Notice that this doesn't guarantee editing activity,
|
|
/// rather just user interaction with the UI.
|
|
inline
|
|
bool tokenIndicatesUserInteraction(const std::string_view token)
|
|
{
|
|
// Exclude tokens that include these keywords, such as statusindicator.
|
|
|
|
// FIXME: This is wrong. That the token happens to contain (or not) a certain substring is
|
|
// no guarantee that it "indicates user interaction". It might be like that at the moment,
|
|
// but that is coincidental. We should check what the actual whole token is, at least, not
|
|
// look for a substring.
|
|
|
|
return (token.find("tile") == std::string::npos &&
|
|
token.find("status") == std::string::npos &&
|
|
token.find("state") == std::string::npos &&
|
|
token != "userinactive");
|
|
}
|
|
|
|
/// Returns true if the token is a likely document modifying command.
|
|
/// This is never 100% accurate, but it is needed to filter out tokens
|
|
/// that certainly do not modify the document, such as 'load' and 'save'
|
|
/// commands. Some commands are certainly modifying, e.g. 'key', others
|
|
/// can only potentially be modifying, e.g. 'mouse' while dragging.
|
|
/// Note: this is only used when we don't have the modified flag from
|
|
/// Core so we flag the document as user-modified more accurately.
|
|
inline bool tokenIndicatesDocumentModification(const StringVector& tokens)
|
|
{
|
|
// These keywords are chosen to cover the largest set of
|
|
// commands that may potentially modify the document.
|
|
// We need to assume modification rather than not.
|
|
if (tokens.equals(0, "key") || tokens.equals(0, "outlinestate") ||
|
|
tokens.equals(0, "paste") || tokens.equals(0, "insertfile") ||
|
|
tokens.equals(0, "textinput") || tokens.equals(0, "windowkey") ||
|
|
tokens.equals(0, "windowmouse") || tokens.equals(0, "windowgesture"))
|
|
{
|
|
return true;
|
|
}
|
|
|
|
if (tokens.size() > 1 && tokens.equals(0, "uno"))
|
|
{
|
|
// By default, all uno commands are modifying, unless we are certain they don't.
|
|
return !tokens.equals(1, ".uno:SidebarHide") && !tokens.equals(1, ".uno:SidebarShow") &&
|
|
!tokens.equals(1, ".uno:Copy") && !tokens.equals(1, ".uno:Save");
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
/// Returns the first line of a message.
|
|
inline
|
|
std::string getFirstLine(const char *message, const int length)
|
|
{
|
|
return Util::getDelimitedInitialSubstring(message, length, '\n');
|
|
}
|
|
|
|
/// Returns the first line of any data which payload char*.
|
|
template <typename T>
|
|
std::string getFirstLine(const T& message)
|
|
{
|
|
return getFirstLine(message.data(), message.size());
|
|
}
|
|
|
|
constexpr int maxNonAbbreviatedMsgLen = 500;
|
|
|
|
inline bool shouldEllipse(const char* message, const size_t length, const size_t spanLen)
|
|
{
|
|
// If first line is less than the length (ignoring possible final newline), add ellipsis.
|
|
if (spanLen == length)
|
|
return false;
|
|
if (spanLen < length - 1)
|
|
return true;
|
|
return message[length - 1] != '\n';
|
|
}
|
|
|
|
/// Given a well-formed utf-8 string 'message' of messageLen bytes and a
|
|
/// desire to truncate to approximately abbrevLen bytes return the shortest
|
|
/// string greater of equal to abbrevLen that does not split a utf-8
|
|
/// sequence.
|
|
inline std::string truncateUtf8(const char* message, size_t messageLen, size_t abbrevLen)
|
|
{
|
|
std::string ret(message, abbrevLen);
|
|
for (size_t i = abbrevLen; i < messageLen; ++i)
|
|
{
|
|
const uint8_t unit = message[i];
|
|
const bool continuation = (unit & 0xC0) == 0x80;
|
|
if (!continuation) // likely
|
|
break;
|
|
ret.push_back(unit);
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
/// Returns an abbreviation of the message (the first line, indicating truncation). We assume
|
|
/// that it adhers to the COOL protocol, i.e. that there is always a first (or only) line that
|
|
/// is in printable UTF-8. I.e. no encoding of binary bytes is done. The format of the result is
|
|
/// not guaranteed to be stable. It is to be used for logging purposes only, not for decoding
|
|
/// protocol frames.
|
|
inline
|
|
std::string getAbbreviatedMessage(const char *message, const int length)
|
|
{
|
|
if (message == nullptr || length <= 0)
|
|
{
|
|
return std::string();
|
|
}
|
|
|
|
const size_t spanLen = Util::getDelimiterPosition(message,
|
|
std::min(length, maxNonAbbreviatedMsgLen), '\n');
|
|
|
|
// If first line is less than the length (minus newline), add ellipsis.
|
|
if (shouldEllipse(message, length, spanLen))
|
|
return truncateUtf8(message, length, spanLen) + "...";
|
|
|
|
return std::string(message, spanLen);
|
|
}
|
|
|
|
inline std::string getAbbreviatedMessage(const std::string& message)
|
|
{
|
|
const size_t spanLen = Util::getDelimiterPosition(message.data(),
|
|
std::min<size_t>(message.size(), maxNonAbbreviatedMsgLen), '\n');
|
|
|
|
// If first line is less than the length (minus newline), add ellipsis.
|
|
if (shouldEllipse(message.data(), message.size(), spanLen))
|
|
return truncateUtf8(message.data(), message.size(), spanLen) + "...";
|
|
|
|
return message.substr(0, spanLen);
|
|
}
|
|
|
|
template <typename T>
|
|
std::string getAbbreviatedMessage(const T& message)
|
|
{
|
|
return getAbbreviatedMessage(message.data(), message.size());
|
|
}
|
|
};
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|