office-gobmx/include/xmlreader/xmlreader.hxx
Norbert Thiebaud 4ad3ed74ea fdo#65108 inter-module includes <> include/xmlreader
Change-Id: I5c04e9169518212ec6a9cbb6d07216a87a2d9cdb
2013-11-09 18:48:44 -06:00

193 lines
5.8 KiB
C++

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#ifndef INCLUDED_XMLREADER_XMLREADER_HXX
#define INCLUDED_XMLREADER_XMLREADER_HXX
#include <sal/config.h>
#include <stack>
#include <vector>
#include <boost/noncopyable.hpp>
#include <com/sun/star/container/NoSuchElementException.hpp>
#include <com/sun/star/uno/RuntimeException.hpp>
#include <osl/file.h>
#include <rtl/ustring.hxx>
#include <sal/types.h>
#include <xmlreader/detail/xmlreaderdllapi.hxx>
#include <xmlreader/pad.hxx>
#include <xmlreader/span.hxx>
namespace xmlreader {
class OOO_DLLPUBLIC_XMLREADER XmlReader: private boost::noncopyable {
public:
explicit XmlReader(OUString const & fileUrl)
SAL_THROW((
com::sun::star::container::NoSuchElementException,
com::sun::star::uno::RuntimeException));
~XmlReader();
enum { NAMESPACE_NONE = -2, NAMESPACE_UNKNOWN = -1, NAMESPACE_XML = 0 };
enum Text { TEXT_NONE, TEXT_RAW, TEXT_NORMALIZED };
enum Result { RESULT_BEGIN, RESULT_END, RESULT_TEXT, RESULT_DONE };
int registerNamespaceIri(Span const & iri);
// RESULT_BEGIN: data = localName, ns = ns
// RESULT_END: data, ns unused
// RESULT_TEXT: data = text, ns unused
Result nextItem(Text reportText, Span * data, int * nsId);
bool nextAttribute(int * nsId, Span * localName);
// the span returned by getAttributeValue is only valid until the next call
// to nextItem or getAttributeValue
Span getAttributeValue(bool fullyNormalize);
int getNamespaceId(Span const & prefix) const;
OUString getUrl() const;
private:
typedef std::vector< Span > NamespaceIris;
// If NamespaceData (and similarly ElementData and AttributeData) is made
// SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about
// "'xmlreader::XmlReader' declared with greater visibility than the type of
// its field 'xmlreader::XmlReader::namespaces_'" (and similarly for
// elements_ and attributes_):
struct NamespaceData {
Span prefix;
int nsId;
NamespaceData() {}
NamespaceData(Span const & thePrefix, int theNsId):
prefix(thePrefix), nsId(theNsId) {}
};
typedef std::vector< NamespaceData > NamespaceList;
struct ElementData {
Span name;
NamespaceList::size_type inheritedNamespaces;
int defaultNamespaceId;
ElementData(
Span const & theName,
NamespaceList::size_type theInheritedNamespaces,
int theDefaultNamespaceId):
name(theName), inheritedNamespaces(theInheritedNamespaces),
defaultNamespaceId(theDefaultNamespaceId)
{}
};
typedef std::stack< ElementData > ElementStack;
struct AttributeData {
char const * nameBegin;
char const * nameEnd;
char const * nameColon;
char const * valueBegin;
char const * valueEnd;
AttributeData(
char const * theNameBegin, char const * theNameEnd,
char const * theNameColon, char const * theValueBegin,
char const * theValueEnd):
nameBegin(theNameBegin), nameEnd(theNameEnd),
nameColon(theNameColon), valueBegin(theValueBegin),
valueEnd(theValueEnd)
{}
};
typedef std::vector< AttributeData > Attributes;
enum State {
STATE_CONTENT, STATE_START_TAG, STATE_END_TAG, STATE_EMPTY_ELEMENT_TAG,
STATE_DONE };
SAL_DLLPRIVATE inline char read() { return pos_ == end_ ? '\0' : *pos_++; }
SAL_DLLPRIVATE inline char peek() { return pos_ == end_ ? '\0' : *pos_; }
SAL_DLLPRIVATE void normalizeLineEnds(Span const & text);
SAL_DLLPRIVATE void skipSpace();
SAL_DLLPRIVATE bool skipComment();
SAL_DLLPRIVATE void skipProcessingInstruction();
SAL_DLLPRIVATE void skipDocumentTypeDeclaration();
SAL_DLLPRIVATE Span scanCdataSection();
SAL_DLLPRIVATE bool scanName(char const ** nameColon);
SAL_DLLPRIVATE int scanNamespaceIri(
char const * begin, char const * end);
SAL_DLLPRIVATE char const * handleReference(
char const * position, char const * end);
SAL_DLLPRIVATE Span handleAttributeValue(
char const * begin, char const * end, bool fullyNormalize);
SAL_DLLPRIVATE Result handleStartTag(int * nsId, Span * localName);
SAL_DLLPRIVATE Result handleEndTag();
SAL_DLLPRIVATE void handleElementEnd();
SAL_DLLPRIVATE Result handleSkippedText(Span * data, int * nsId);
SAL_DLLPRIVATE Result handleRawText(Span * text);
SAL_DLLPRIVATE Result handleNormalizedText(Span * text);
SAL_DLLPRIVATE int toNamespaceId(NamespaceIris::size_type pos);
OUString fileUrl_;
oslFileHandle fileHandle_;
sal_uInt64 fileSize_;
void * fileAddress_;
NamespaceIris namespaceIris_;
NamespaceList namespaces_;
ElementStack elements_;
char const * pos_;
char const * end_;
State state_;
Attributes attributes_;
Attributes::iterator currentAttribute_;
bool firstAttribute_;
Pad pad_;
};
}
#endif
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */