Filter option for extract document structure

Example use of filter:
curl -F "data=@docStructure2.docx" -F "filter=contentcontrol" http://localhost:9980/cool/extract-document-structure > out.json

it will extract contentControls only if filter=contentcontrol is used,
or if no filter is used.
No filter means extract everything.

Signed-off-by: Attila Szűcs <attila.szucs@collabora.com>
Change-Id: I001a2cc525be7502d53b5849bb61a99d087ed807
This commit is contained in:
Attila Szűcs 2024-07-11 12:11:19 +02:00 committed by Szymon Kłos
parent 09e8e1bf79
commit 0d411ed1f5
4 changed files with 20 additions and 6 deletions

View file

@ -347,7 +347,14 @@ bool ChildSession::_handleInput(const char *buffer, int length)
assert(!getDocURL().empty());
assert(!getJailedFilePath().empty());
char* data = _docManager->getLOKit()->extractDocumentStructureRequest(getJailedFilePath().c_str());
std::string filter;
if (tokens.size() > 2)
{
getTokenString(tokens[2], "filter", filter);
}
char* data = _docManager->getLOKit()->extractDocumentStructureRequest(getJailedFilePath().c_str(),
filter.c_str());
if (!data)
{
LOG_TRC("extractDocumentStructureRequest returned no data.");

View file

@ -360,7 +360,7 @@ getConvertToBrokerImplementation(const std::string& requestType, const std::stri
const Poco::URI& uriPublic, const std::string& docKey,
const std::string& format, const std::string& options,
const std::string& lang, const std::string& target,
const std::string& transformJSON)
const std::string& filter, const std::string& transformJSON)
{
if (requestType == "convert-to")
return std::make_shared<ConvertToBroker>(fromPath, uriPublic, docKey, format, options,
@ -368,7 +368,8 @@ getConvertToBrokerImplementation(const std::string& requestType, const std::stri
else if (requestType == "extract-link-targets")
return std::make_shared<ExtractLinkTargetsBroker>(fromPath, uriPublic, docKey, lang);
else if (requestType == "extract-document-structure")
return std::make_shared<ExtractDocumentStructureBroker>(fromPath, uriPublic, docKey, lang);
return std::make_shared<ExtractDocumentStructureBroker>(fromPath, uriPublic, docKey, lang,
filter);
else if (requestType == "transform-document-structure")
{
if (format.empty())
@ -1481,6 +1482,7 @@ void ClientRequestDispatcher::handlePostRequest(const RequestDetails& requestDet
std::string lang = (form.has("lang") ? form.get("lang") : std::string());
std::string target = (form.has("target") ? form.get("target") : std::string());
std::string filter = (form.has("filter") ? form.get("filter") : std::string());
std::string encodedTransformJSON;
if (form.has("transform"))
@ -1496,7 +1498,7 @@ void ClientRequestDispatcher::handlePostRequest(const RequestDetails& requestDet
LOG_DBG("New DocumentBroker for docKey [" << docKey << "].");
auto docBroker = getConvertToBrokerImplementation(
requestDetails[1], fromPath, uriPublic, docKey, format, options, lang, target,
encodedTransformJSON);
filter, encodedTransformJSON);
handler.takeFile();
cleanupDocBrokers();

View file

@ -4256,7 +4256,9 @@ void ExtractDocumentStructureBroker::sendStartMessage(const std::shared_ptr<Clie
{
ConvertToBroker::sendStartMessage(clientSession, encodedFrom);
const auto command = "extractdocumentstructure url=" + encodedFrom;
std::string command = "extractdocumentstructure url=" + encodedFrom;
if (!_filter.empty())
command += " filter=" + _filter;
forwardToChild(clientSession, command);
}

View file

@ -1757,13 +1757,16 @@ private:
class ExtractDocumentStructureBroker final : public ConvertToBroker
{
public:
const std::string _filter;
/// Construct DocumentBroker with URI and docKey
ExtractDocumentStructureBroker(const std::string& uri,
const Poco::URI& uriPublic,
const std::string& docKey,
const std::string& lang)
const std::string& lang,
const std::string& filter)
: ConvertToBroker(uri, uriPublic, docKey, Poco::Path(uri).getExtension(), "",
lang)
, _filter(filter)
{}
private: