Related: tdf#114428 svtools HTML import: avoid XML declaration in body text

Just ignore it for now.

Change-Id: Idf82af611370d957c6704cce250941a8a0b90637
Reviewed-on: https://gerrit.libreoffice.org/46388
Tested-by: Jenkins <ci@libreoffice.org>
Reviewed-by: Miklos Vajna <vmiklos@collabora.co.uk>
This commit is contained in:
Miklos Vajna 2017-12-13 14:46:26 +01:00
parent 14daba5bd0
commit 3fe64261b5
3 changed files with 73 additions and 1 deletions

View file

@ -14,6 +14,7 @@ $(eval $(call gb_CppunitTest_use_external,svtools_html,boost_headers))
$(eval $(call gb_CppunitTest_use_sdk_api,svtools_html))
$(eval $(call gb_CppunitTest_add_exception_objects,svtools_html, \
svtools/qa/unit/testHtmlReader \
svtools/qa/unit/testHtmlWriter \
))

View file

@ -0,0 +1,70 @@
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
*/
#include <cppunit/TestFixture.h>
#include <cppunit/extensions/HelperMacros.h>
#include <com/sun/star/document/XDocumentProperties.hpp>
#include <svtools/parhtml.hxx>
#include <tools/ref.hxx>
#include <tools/stream.hxx>
namespace
{
/// Subclass of HTMLParser that can sense the import result.
class TestHTMLParser : public HTMLParser
{
public:
TestHTMLParser(SvStream& rStream);
virtual void NextToken(HtmlTokenId nToken) override;
OUString m_aDocument;
};
TestHTMLParser::TestHTMLParser(SvStream& rStream)
: HTMLParser(rStream)
{
}
void TestHTMLParser::NextToken(HtmlTokenId nToken)
{
if (nToken == HtmlTokenId::TEXTTOKEN)
m_aDocument += aToken;
}
/// Tests HTMLParser.
class Test : public CppUnit::TestFixture
{
public:
void testTdf114428();
CPPUNIT_TEST_SUITE(Test);
CPPUNIT_TEST(testTdf114428);
CPPUNIT_TEST_SUITE_END();
};
void Test::testTdf114428()
{
SvMemoryStream aStream;
OString aDocument("<?xml version=\"1.0\" encoding=\"utf-8\"?>\n<html>hello</html>");
aStream.WriteBytes(aDocument.getStr(), aDocument.getLength());
aStream.Seek(0);
tools::SvRef<TestHTMLParser> xParser = new TestHTMLParser(aStream);
xParser->CallParser();
// This was '<?xml version="1.0" encoding="utf-8"?> hello', XML declaration
// was not ignored.
CPPUNIT_ASSERT_EQUAL(OUString("hello"), xParser->m_aDocument.trim());
}
CPPUNIT_TEST_SUITE_REGISTRATION(Test);
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */

View file

@ -1042,7 +1042,8 @@ HtmlTokenId HTMLParser::GetNextToken_()
bOffState = true;
nNextCh = GetNextChar();
}
if( rtl::isAsciiAlpha( nNextCh ) || '!'==nNextCh )
// Assume '<?' is a start of an XML declaration, ignore it.
if (rtl::isAsciiAlpha(nNextCh) || nNextCh == '!' || nNextCh == '?')
{
OUStringBuffer sTmpBuffer;
do {