libreoffice-online/common/StringVector.hpp

316 lines
9.6 KiB
C++
Raw Normal View History

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#pragma once
#include <cstdint>
#include <cstring>
#include <string>
#include <string_view>
#include <utility>
#include <vector>
/**
* Stores an offset and a length into the single underlying string of StringVector.
*/
struct StringToken
{
std::size_t _index;
std::size_t _length;
StringToken() = default;
StringToken(std::size_t index, std::size_t length)
: _index(index),
_length(length)
{
}
};
/**
* Safe wrapper around an std::vector<std::string>. Gives you an empty string if you would read past
* the ends of the vector.
*/
class StringVector
{
/// All tokens are substrings of this string.
std::string _string;
std::vector<StringToken> _tokens;
public:
explicit StringVector() = default;
explicit StringVector(std::string string, std::vector<StringToken> tokens)
: _string(std::move(string))
, _tokens(std::move(tokens))
{
}
/// Tokenize delimited values until we hit new-line or the end.
static void tokenize(const char* data, const std::size_t size, const char delimiter,
std::vector<StringToken>& tokens)
{
if (size == 0 || data == nullptr || *data == '\0')
return;
tokens.reserve(16);
const char* start = data;
const char* end = data;
for (std::size_t i = 0; i < size && data[i] != '\n'; ++i, ++end)
{
if (data[i] == delimiter)
{
if (start != end && *start != delimiter)
tokens.emplace_back(start - data, end - start);
start = end;
}
else if (*start == delimiter)
++start;
}
if (start != end && *start != delimiter && *start != '\n')
tokens.emplace_back(start - data, end - start);
}
// call func on each token until func returns true or we run out of tokens
template <class UnaryFunction>
static void tokenize_foreach(UnaryFunction& func, const char* data, const std::size_t size, const char delimiter = ' ')
{
if (size == 0 || data == nullptr || *data == '\0')
return;
size_t index = 0;
const char* start = data;
const char* end = data;
for (std::size_t i = 0; i < size && data[i] != '\n'; ++i, ++end)
{
if (data[i] == delimiter)
{
if (start != end && *start != delimiter)
{
if (func(index++, std::string_view(start, end - start)))
return;
}
start = end;
}
else if (*start == delimiter)
++start;
}
if (start != end && *start != delimiter && *start != '\n')
func(index, std::string_view(start, end - start));
}
/// Tokenize single-char delimited values until we hit new-line or the end.
static StringVector tokenize(const char* data, const std::size_t size,
const char delimiter = ' ')
{
if (size == 0 || data == nullptr || *data == '\0')
return StringVector();
std::vector<StringToken> tokens;
tokenize(data, size, delimiter, tokens);
return StringVector(std::string(data, size), std::move(tokens));
}
/// Tokenize single-char delimited values until we hit new-line or the end.
static StringVector tokenize(const std::string& s, const char delimiter = ' ')
{
if (s.empty())
return StringVector();
std::vector<StringToken> tokens;
tokenize(s.data(), s.size(), delimiter, tokens);
return StringVector(s, std::move(tokens));
}
/// Tokenize by the delimiter string.
static StringVector tokenize(const std::string& s, const char* delimiter, int len = -1)
{
if (s.empty() || len == 0 || delimiter == nullptr || *delimiter == '\0')
return StringVector();
if (len < 0)
len = std::strlen(delimiter);
std::size_t start = 0;
std::size_t end = s.find(delimiter, start);
std::vector<StringToken> tokens;
tokens.reserve(16);
tokens.emplace_back(start, end - start);
start = end + len;
while (end != std::string::npos)
{
end = s.find(delimiter, start);
tokens.emplace_back(start, end - start);
start = end + len;
}
return StringVector(s, std::move(tokens));
}
template <std::size_t N>
static StringVector tokenize(const std::string& s, const char (&delimiter)[N])
{
return tokenize(s, delimiter, N - 1);
}
static StringVector tokenize(const std::string& s, const std::string& delimiter)
{
return tokenize(s, delimiter.data(), delimiter.size());
}
/** Tokenize based on any of the characters in 'delimiters'.
Ie. when there is '\n\r' in there, any of them means a delimiter.
In addition, trim the values so there are no leading or trailing spaces.
*/
static StringVector tokenizeAnyOf(const std::string& s, const char* delimiters,
const std::size_t delimitersLength);
template <std::size_t N>
static StringVector tokenizeAnyOf(const std::string& s, const char (&delimiters)[N])
{
return tokenizeAnyOf(s, delimiters, N - 1); // Exclude the null terminator.
}
static StringVector tokenizeAnyOf(const std::string& s, const char* delimiters)
{
return tokenizeAnyOf(s, delimiters, std::strlen(delimiters));
}
/// Unlike std::vector, gives an empty string if index is unexpected.
std::string operator[](std::size_t index) const
{
if (index >= _tokens.size())
{
return std::string();
}
const StringToken& token = _tokens[index];
return _string.substr(token._index, token._length);
}
std::size_t size() const { return _tokens.size(); }
bool empty() const { return _tokens.empty(); }
std::vector<StringToken>::const_iterator begin() const { return _tokens.begin(); }
std::vector<StringToken>::iterator begin() { return _tokens.begin(); }
std::vector<StringToken>::const_iterator end() const { return _tokens.end(); }
std::vector<StringToken>::iterator end() { return _tokens.end(); }
std::vector<StringToken>::iterator erase(std::vector<StringToken>::const_iterator it)
{
return _tokens.erase(it);
}
void push_back(const std::string& string)
{
_tokens.emplace_back(_string.size(), string.size());
_string += string;
}
/// Gets the underlying string of a single token.
std::string getParam(const StringToken& token) const
{
return _string.substr(token._index, token._length);
}
/// Concats tokens starting from begin, using separator as separator.
template <typename T> inline std::string cat(const T& separator, std::size_t offset) const
{
std::string ret;
if (offset >= _tokens.size())
{
return ret;
}
ret.reserve(_string.size() * 2);
auto it = _tokens.begin() + offset;
ret = getParam(*it);
for (++it; it != _tokens.end(); ++it)
{
// Avoid temporary strings, append separately.
ret += separator;
ret += getParam(*it);
}
return ret;
}
/// Compares the nth token with string.
template <typename T> bool equals(std::size_t index, const T& string) const
{
if (index >= _tokens.size())
{
return false;
}
const StringToken& token = _tokens[index];
return _string.compare(token._index, token._length, string) == 0;
}
/// Compares the nth token with string.
template <std::size_t N> bool equals(std::size_t index, const char (&string)[N]) const
{
if (index >= _tokens.size())
{
return false;
}
const StringToken& token = _tokens[index];
return _string.compare(token._index, token._length, string, N - 1) == 0;
}
// Checks if the token text at index starts with the given string
template <std::size_t N>
bool startsWith(std::size_t index, const char (&string)[N]) const
{
if (index >= _tokens.size())
{
return false;
}
const StringToken& token = _tokens[index];
constexpr auto len = N - 1; // we don't want to compare the '\0'
return token._length >= len && _string.compare(token._index, len, string) == 0;
}
// Checks if the token text starts with the given string
template <std::size_t N>
bool startsWith(const StringToken& token, const char (&string)[N]) const
{
if (token._index >= _string.size())
{
return false;
}
constexpr auto len = N - 1; // we don't want to compare the '\0'
return token._length >= len && _string.compare(token._index, len, string) == 0;
}
/// Compares the nth token with the mth token from another StringVector.
bool equals(std::size_t index, const StringVector& other, std::size_t otherIndex);
bool getUInt32(std::size_t index, const std::string& key, uint32_t& value) const;
bool getNameIntegerPair(std::size_t index, std::string& name, int& value) const;
};
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */