libreoffice-online/net/HttpRequest.cpp

618 lines
19 KiB
C++
Raw Normal View History

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4; fill-column: 100 -*- */
/*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*/
#include <config.h>
#include "HttpRequest.hpp"
#include <Poco/MemoryStream.h>
#include <Poco/Net/HTTPResponse.h>
#include <chrono>
#include <cstdint>
#include <fstream>
#include <memory>
#include <sstream>
#include <string>
#include <sys/types.h>
#include <netdb.h>
#include "Common.hpp"
#include <utility>
#include "Log.hpp"
#include "Util.hpp"
namespace http
{
/// Returns true iff the character given is a whitespace.
/// FIXME: Technically, we should skip: SP, HTAB, VT (%x0B),
/// FF (%x0C), or bare CR.
static inline bool isWhitespace(const char ch) { return ch == ' ' || ch == '\t' || ch == '\r'; }
/// Skips over space and tab characters starting at off.
/// Returns the offset of the first match, otherwise, len.
static inline int64_t skipSpaceAndTab(const char* p, int64_t off, int64_t len)
{
for (; off < len; ++off)
{
if (!isWhitespace(p[off]))
return off;
}
return len;
}
static inline int64_t skipCRLF(const char* p, int64_t off, int64_t len)
{
for (; off < len; ++off)
{
if (p[off] != '\r' && p[off] != '\n')
return off;
}
return len;
}
/// Find the line-break.
/// Returns the offset to the first LF character,
/// if found, otherwise, len.
/// Ex.: for [xxxCRLFCRLF] the offset to the second LF is returned.
static inline int64_t findLineBreak(const char* p, int64_t off, int64_t len)
{
// Find the line break, which ends the status line.
for (; off < len; ++off)
{
// We expect CRLF, but LF alone is enough.
if (p[off] == '\n')
return off;
}
return len;
}
/// Finds the double CRLF that signifies the end
/// of a block, such as a header. The second CRLF
/// is for a blank line, and that's what we seek.
static inline int64_t findBlankLine(const char* p, int64_t off, int64_t len)
{
for (; off < len;)
{
off = findLineBreak(p, off, len);
// off is at the first LF, and we expect LFCRLF.
if (off + 2 >= len)
{
return len; // Not found.
}
if (p[off + 1] == '\r' && p[off + 2] == '\n')
{
return off + 2; // Return the second LF.
}
off += 3; // Skip over the mismatch.
}
return len;
}
/// Find the end of text.
/// Returns the offset to the first whitespace or
/// line-break character if found, otherwise, len.
static inline int64_t findEndOfToken(const char* p, int64_t off, int64_t len)
{
for (; off < len; ++off)
{
if (isWhitespace(p[off]) || p[off] == '\n')
return off;
}
return len;
}
int64_t Header::parse(const char* p, int64_t len)
{
LOG_TRC("Reading header given " << len << " bytes: " << std::string(p, std::min(len, 80L)));
if (len < 4)
{
// Incomplete; we need at least \r\n\r\n.
return 0;
}
// Make sure we have the full header before parsing.
const int64_t end = findBlankLine(p, 0, len);
if (end == len)
{
return 0; // Incomplete.
}
try
{
//FIXME: implement http header parser!
// Now parse to preserve folded headers and other
// corner cases that is conformant to the rfc,
// detecting any errors and/or invalid entries.
// NB: request.read() expects full message and will fail.
Poco::Net::MessageHeader msgHeader;
Poco::MemoryInputStream data(p, len);
msgHeader.read(data);
if (data.tellg() < 0)
{
LOG_DBG("Failed to parse http header.");
return -1;
}
// Copy the header entries over to us.
for (const auto& pair : msgHeader)
{
set(Util::trimmed(pair.first), Util::trimmed(pair.second));
}
_chunked = getTransferEncoding() == "chunked";
LOG_TRC("Read " << data.tellg() << " bytes of header:\n"
<< std::string(p, data.tellg())
<< "\nhasContentLength: " << hasContentLength()
<< ", contentLength: " << (hasContentLength() ? getContentLength() : -1)
<< ", chunked: " << getChunkedTransferEncoding());
// We consumed the full header, including the blank line.
return end + 1;
}
catch (const Poco::Exception& exc)
{
LOG_TRC("ERROR while parsing http header: " << exc.displayText());
}
return 0;
}
/// Parses a Status Line.
/// Returns the state and clobbers the len on succcess to the number of bytes read.
FieldParseState StatusLine::parse(const char* p, int64_t& len)
{
#ifdef DEBUG_HTTP
LOG_TRC("StatusLine::parse: " << len << " bytes available\n"
<< Util::dumpHex(std::string(p, std::min(len, 10 * 1024L))));
#endif //DEBUG_HTTP
// First line is the status line.
if (p == nullptr || len < MinStatusLineLen)
return FieldParseState::Incomplete;
int64_t off = skipSpaceAndTab(p, 0, len);
if (off >= MaxStatusLineLen)
return FieldParseState::Invalid;
// We still expect the minimum amount of data.
if ((len - off) < MinStatusLineLen)
return FieldParseState::Incomplete;
// We should have the version now.
assert(off + VersionLen < len && "Expected to have more data.");
const char* version = &p[off];
constexpr int VersionMajPos = sizeof("HTTP/") - 1;
constexpr int VersionDotPos = VersionMajPos + 1;
constexpr int VersionMinPos = VersionDotPos + 1;
const int versionMaj = version[VersionMajPos] - '0';
const int versionMin = version[VersionMinPos] - '0';
// Version may not be null-terminated.
if (!Util::startsWith(std::string(version, VersionLen), "HTTP/")
|| (versionMaj < 0 || versionMaj > 9) || version[VersionDotPos] != '.'
|| (versionMin < 0 || versionMin > 9))
{
LOG_ERR("StatusLine::parse: Invalid HTTP version [" << std::string(version, VersionLen)
<< "]");
return FieldParseState::Invalid;
}
_httpVersion = std::string(version, VersionLen);
_versionMajor = versionMaj;
_versionMinor = versionMin;
// Find the Status Code.
off = skipSpaceAndTab(p, off + VersionLen, len);
if (off >= MaxStatusLineLen)
return FieldParseState::Invalid;
// We still expect the Status Code and CRLF.
if ((len - off) < (MinStatusLineLen - VersionLen))
return FieldParseState::Incomplete;
// Read the Status Code now.
assert(off + StatusCodeLen < len && "Expected to have more data.");
_statusCode = std::atoi(&p[off]);
if (_statusCode < MinValidStatusCode || _statusCode > MaxValidStatusCode)
{
LOG_ERR("StatusLine::parse: Invalid StatusCode [" << _statusCode << "]");
return FieldParseState::Invalid;
}
// Find the Reason Phrase.
off = skipSpaceAndTab(p, off + StatusCodeLen, len);
if (off >= MaxStatusLineLen)
{
LOG_ERR("StatusLine::parse: StatusCode is too long: " << off);
return FieldParseState::Invalid;
}
const int64_t reasonOff = off;
// Find the line break, which ends the status line.
off = findLineBreak(p, off, len);
if (off >= len)
return FieldParseState::Incomplete;
for (; off < len; ++off)
{
if (p[off] == '\r' || p[off] == '\n')
break;
if (off >= MaxStatusLineLen)
{
LOG_ERR("StatusLine::parse: StatusCode is too long: " << off);
return FieldParseState::Invalid;
}
}
_reasonPhrase = std::string(&p[reasonOff], off - reasonOff - 1); // Exclude '\r'.
// Consume the line breaks.
for (; off < len; ++off)
{
if (p[off] != '\r' && p[off] != '\n')
break;
}
len = off;
return FieldParseState::Valid;
}
int64_t Request::readData(const char* p, const int64_t len)
{
int64_t available = len;
if (_stage == Stage::Header)
{
// First line is the status line.
if (p == nullptr || len < MinRequestHeaderLen)
{
LOG_TRC("Request::readData: len < MinRequestHeaderLen");
return 0;
}
// Verb.
int64_t off = skipSpaceAndTab(p, 0, available);
int64_t end = findEndOfToken(p, off, available);
if (end == available)
{
// Incomplete data.
return 0;
}
_verb = std::string(&p[off], end - off);
// URL.
off = skipSpaceAndTab(p, end, available);
end = findEndOfToken(p, off, available);
if (end == available)
{
// Incomplete data.
return 0;
}
_url = std::string(&p[off], end - off);
// Version.
off = skipSpaceAndTab(p, end, available);
if (off + VersionLen >= available)
{
// Incomplete data.
return 0;
}
// We should have the version now.
assert(off + VersionLen < available && "Expected to have more data.");
const char* version = &p[off];
constexpr int VersionMajPos = sizeof("HTTP/") - 1;
constexpr int VersionDotPos = VersionMajPos + 1;
constexpr int VersionMinPos = VersionDotPos + 1;
const int versionMaj = version[VersionMajPos] - '0';
const int versionMin = version[VersionMinPos] - '0';
// Version may not be null-terminated.
if (!Util::startsWith(std::string(version, VersionLen), "HTTP/")
|| (versionMaj < 0 || versionMaj > 9) || version[VersionDotPos] != '.'
|| (versionMin < 0 || versionMin > 9))
{
LOG_ERR("Request::dataRead: Invalid HTTP version [" << std::string(version, VersionLen)
<< "]");
return -1;
}
_version = std::string(version, VersionLen);
off += VersionLen;
end = findLineBreak(p, off, available);
if (end >= available)
{
// Incomplete data.
return 0;
}
++end; // Skip the LF character.
// LOG_TRC("performWrites (header): " << headerStr.size() << ": " << headerStr);
_stage = Stage::Body;
p += end;
available -= end;
}
if (_stage == Stage::Body)
{
const int64_t read = _header.parse(p, available);
if (read < 0)
{
// _state = State::Error;
return read;
}
if (read > 0)
{
available -= read;
p += read;
#ifdef DEBUG_HTTP
LOG_TRC("After Header: "
<< available << " bytes availble\n"
<< Util::dumpHex(std::string(p, std::min(available, 1 * 1024L))));
#endif //DEBUG_HTTP
}
if (_verb == VERB_GET)
{
// A payload in a GET request "has no defined semantics".
return len - available;
}
else
{
// TODO: Implement POST and HEAD support.
LOG_ERR("Unsupported HTTP Method [" << _verb << ']');
return -1;
}
}
return len - available;
}
/// Handles incoming data.
/// Returns the number of bytes consumed, or -1 for error
/// and/or to interrupt transmission.
int64_t Response::readData(const char* p, int64_t len)
{
LOG_TRC("readData: " << len << " bytes");
// We got some data.
_state = State::Incomplete;
int64_t available = len;
if (_parserStage == ParserStage::StatusLine)
{
int64_t read = available;
switch (_statusLine.parse(p, read))
{
case FieldParseState::Unknown:
case FieldParseState::Incomplete:
return 0;
case FieldParseState::Invalid:
_state = State::Error;
return -1;
case FieldParseState::Valid:
if (read <= 0)
return read; // Unexpected, really.
if (read > 0)
{
//FIXME: Don't consume what we read until we have our header parser.
// available -= read;
// p += read;
_parserStage = ParserStage::Header;
}
break;
}
}
if (_parserStage == ParserStage::Header && available)
{
const int64_t read = _header.parse(p, available);
if (read < 0)
{
_state = State::Error;
return read;
}
if (read > 0)
{
available -= read;
p += read;
#ifdef DEBUG_HTTP
LOG_TRC("After Header: "
<< available << " bytes available\n"
<< Util::dumpHex(std::string(p, std::min(available, 1 * 1024L))));
#endif //DEBUG_HTTP
// Assume we have a body unless we have reason to expect otherwise.
_parserStage = ParserStage::Body;
if (_statusLine.statusCategory() == StatusLine::StatusCodeClass::Informational
|| _statusLine.statusCode() == 204 /*No Content*/
|| _statusLine.statusCode() == 304 /*Not Modified*/) // || HEAD request
// || 2xx on CONNECT request
{
// No body, we are done.
_parserStage = ParserStage::Finished;
}
else
{
// We can possibly have a body.
if (_statusLine.statusCategory() != StatusLine::StatusCodeClass::Successful)
{
// Failed: Store the body (if any) in memory.
saveBodyToMemory();
}
if (_header.hasContentLength())
{
if (_header.getContentLength() < 0 || !_header.getTransferEncoding().empty())
{
// Invalid Content-Length or have Transfer-Encoding too.
// 3.3.2. Content-Length
// A sender MUST NOT send a Content-Length header field in any message
// that contains a Transfer-Encoding header field.
LOG_ERR("Unexpected Content-Length header in response: "
<< _header.getContentLength()
<< ", Transfer-Encoding: " << _header.getTransferEncoding());
_state = State::Error;
_parserStage = ParserStage::Finished;
}
else if (_header.getContentLength() == 0)
_parserStage = ParserStage::Finished; // No body, we are done.
}
if (_parserStage != ParserStage::Finished)
_parserStage = ParserStage::Body;
}
}
}
if (_parserStage == ParserStage::Body && available)
{
LOG_TRC("ParserStage::Body: " << available);
if (_header.getChunkedTransferEncoding())
{
// This is a chunked transfer.
// Find the start of the chunk, which is
// the length of the chunk in hex.
// each chunk is preceeded by its length in hex.
while (available)
{
#ifdef DEBUG_HTTP
LOG_TRC("New Chunk, "
<< available << " bytes available\n"
<< Util::dumpHex(std::string(p, std::min(available, 10 * 1024L))));
#endif //DEBUG_HTTP
// Read ahead to see if we have enough data
// to consume the chunk length.
int64_t off = findLineBreak(p, 0, available);
if (off == available)
{
LOG_TRC("Not enough data for chunk size");
// Not enough data.
return len - available; // Don't remove.
}
++off; // Skip the LF itself.
// Read the chunk length.
int64_t chunkLen = 0;
int chunkLenSize = 0;
for (; chunkLenSize < available; ++chunkLenSize)
{
const int digit = Util::hexDigitFromChar(p[chunkLenSize]);
if (digit < 0)
break;
chunkLen = chunkLen * 16 + digit;
}
LOG_TRC("ChunkLen: " << chunkLen);
if (chunkLen > 0)
{
// Do we have enough data for this chunk?
if (available - off < chunkLen + 2) // + CRLF.
{
// Not enough data.
LOG_TRC("Not enough chunk data. Need " << chunkLen + 2 << " but have only "
<< available - off);
return len - available; // Don't remove.
}
// Skip the chunkLen bytes and any chunk extensions.
available -= off;
p += off;
const int64_t read = _onBodyWriteCb(p, chunkLen);
if (read != chunkLen)
{
LOG_ERR("Error writing http response payload. Write "
"handler returned "
<< read << " instead of " << chunkLen);
_state = State::Error;
return -1;
}
available -= chunkLen;
p += chunkLen;
_recvBodySize += chunkLen;
LOG_TRC("Wrote " << chunkLen << " bytes for a total of " << _recvBodySize);
// Skip blank lines.
off = skipCRLF(p, 0, available);
p += off;
available -= off;
}
else
{
// That was the last chunk!
_parserStage = ParserStage::Finished;
available = 0; // Consume all.
LOG_TRC("Got LastChunk, finished.");
break;
}
}
}
else
{
// Non-chunked payload.
// Write the body into the output, returns the
// number of bytes read from the given buffer.
const int64_t wrote = _onBodyWriteCb(p, available);
if (wrote < 0)
{
LOG_ERR("Error writing http response payload. Write handler returned "
<< wrote << " instead of " << available);
_state = State::Error;
return wrote;
}
if (wrote > 0)
{
available -= wrote;
_recvBodySize += wrote;
if (_header.hasContentLength() && _recvBodySize >= _header.getContentLength())
{
LOG_TRC("Wrote all content, finished.");
_parserStage = ParserStage::Finished;
}
}
}
}
if (_parserStage == ParserStage::Finished)
{
complete();
}
LOG_TRC("Done consuming response, had " << len << " bytes, consumed " << len - available
<< " leaving " << available << " unused.");
return len - available;
}
} // namespace http
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */