6af59644d6
Please find attached a partial fix for Easy Hack FDO43460 Part XXI Module oox
404 lines
16 KiB
C++
404 lines
16 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*************************************************************************
|
|
*
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* Copyright 2000, 2010 Oracle and/or its affiliates.
|
|
*
|
|
* OpenOffice.org - a multi-platform office productivity suite
|
|
*
|
|
* This file is part of OpenOffice.org.
|
|
*
|
|
* OpenOffice.org is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public License version 3
|
|
* only, as published by the Free Software Foundation.
|
|
*
|
|
* OpenOffice.org is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Lesser General Public License version 3 for more details
|
|
* (a copy is included in the LICENSE file that accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* version 3 along with OpenOffice.org. If not, see
|
|
* <http://www.openoffice.org/license.html>
|
|
* for a copy of the LGPLv3 License.
|
|
*
|
|
************************************************************************/
|
|
|
|
#include "oox/vml/vmlinputstream.hxx"
|
|
|
|
#include <com/sun/star/io/XTextInputStream.hpp>
|
|
#include <map>
|
|
#include <string.h>
|
|
#include <rtl/strbuf.hxx>
|
|
#include "oox/helper/helper.hxx"
|
|
#include "oox/helper/textinputstream.hxx"
|
|
|
|
namespace oox {
|
|
namespace vml {
|
|
|
|
// ============================================================================
|
|
|
|
using namespace ::com::sun::star::io;
|
|
using namespace ::com::sun::star::uno;
|
|
|
|
using ::rtl::OString;
|
|
using ::rtl::OStringBuffer;
|
|
|
|
// ============================================================================
|
|
|
|
namespace {
|
|
|
|
inline const sal_Char* lclFindCharacter( const sal_Char* pcBeg, const sal_Char* pcEnd, sal_Char cChar )
|
|
{
|
|
sal_Int32 nIndex = rtl_str_indexOfChar_WithLength( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ), cChar );
|
|
return (nIndex < 0) ? pcEnd : (pcBeg + nIndex);
|
|
}
|
|
|
|
inline bool lclIsWhiteSpace( sal_Char cChar )
|
|
{
|
|
return cChar < 32;
|
|
}
|
|
|
|
const sal_Char* lclFindWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
|
|
{
|
|
for( ; pcBeg < pcEnd; ++pcBeg )
|
|
if( lclIsWhiteSpace( *pcBeg ) )
|
|
return pcBeg;
|
|
return pcEnd;
|
|
}
|
|
|
|
const sal_Char* lclFindNonWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
|
|
{
|
|
for( ; pcBeg < pcEnd; ++pcBeg )
|
|
if( !lclIsWhiteSpace( *pcBeg ) )
|
|
return pcBeg;
|
|
return pcEnd;
|
|
}
|
|
|
|
const sal_Char* lclTrimWhiteSpaceFromEnd( const sal_Char* pcBeg, const sal_Char* pcEnd )
|
|
{
|
|
while( (pcBeg < pcEnd) && lclIsWhiteSpace( pcEnd[ -1 ] ) )
|
|
--pcEnd;
|
|
return pcEnd;
|
|
}
|
|
|
|
inline void lclAppendToBuffer( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
|
|
{
|
|
rBuffer.append( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ) );
|
|
}
|
|
|
|
// ----------------------------------------------------------------------------
|
|
|
|
void lclProcessAttribs( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
|
|
{
|
|
/* Map attribute names to char-pointer of all attributes. This map is used
|
|
to find multiple occurrences of attributes with the same name. The
|
|
mapped pointers are used as map key in the next map below. */
|
|
typedef ::std::map< OString, const sal_Char* > AttributeNameMap;
|
|
AttributeNameMap aAttributeNames;
|
|
|
|
/* Map the char-pointers of all attributes to the full attribute definition
|
|
string. This preserves the original order of the used attributes. */
|
|
typedef ::std::map< const sal_Char*, OString > AttributeDataMap;
|
|
AttributeDataMap aAttributes;
|
|
|
|
bool bOk = true;
|
|
const sal_Char* pcNameBeg = pcBeg;
|
|
while( bOk && (pcNameBeg < pcEnd) )
|
|
{
|
|
// pcNameBeg points to begin of attribute name, find equality sign
|
|
const sal_Char* pcEqualSign = lclFindCharacter( pcNameBeg, pcEnd, '=' );
|
|
if( (bOk = pcEqualSign < pcEnd) == true )
|
|
{
|
|
// find end of attribute name (ignore whitespace between name and equality sign)
|
|
const sal_Char* pcNameEnd = lclTrimWhiteSpaceFromEnd( pcNameBeg, pcEqualSign );
|
|
if( (bOk = pcNameBeg < pcNameEnd) == true )
|
|
{
|
|
// find begin of attribute value (must be single or double quote)
|
|
const sal_Char* pcValueBeg = lclFindNonWhiteSpace( pcEqualSign + 1, pcEnd );
|
|
if( (bOk = (pcValueBeg < pcEnd) && ((*pcValueBeg == '\'') || (*pcValueBeg == '"'))) == true )
|
|
{
|
|
// find end of attribute value (matching quote character)
|
|
const sal_Char* pcValueEnd = lclFindCharacter( pcValueBeg + 1, pcEnd, *pcValueBeg );
|
|
if( (bOk = pcValueEnd < pcEnd) == true )
|
|
{
|
|
++pcValueEnd;
|
|
OString aAttribName( pcNameBeg, static_cast< sal_Int32 >( pcNameEnd - pcNameBeg ) );
|
|
OString aAttribData( pcNameBeg, static_cast< sal_Int32 >( pcValueEnd - pcNameBeg ) );
|
|
// search for an existing attribute with the same name
|
|
AttributeNameMap::iterator aIt = aAttributeNames.find( aAttribName );
|
|
// remove its definition from the data map
|
|
if( aIt != aAttributeNames.end() )
|
|
aAttributes.erase( aIt->second );
|
|
// insert the attribute into both maps
|
|
aAttributeNames[ aAttribName ] = pcNameBeg;
|
|
aAttributes[ pcNameBeg ] = aAttribData;
|
|
// continue with next attribute (skip whitespace after this attribute)
|
|
pcNameBeg = pcValueEnd;
|
|
if( (pcNameBeg < pcEnd) && ((bOk = lclIsWhiteSpace( *pcNameBeg )) == true) )
|
|
pcNameBeg = lclFindNonWhiteSpace( pcNameBeg + 1, pcEnd );
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// if no error has occurred, build the resulting attribute list
|
|
if( bOk )
|
|
for( AttributeDataMap::iterator aIt = aAttributes.begin(), aEnd = aAttributes.end(); aIt != aEnd; ++aIt )
|
|
rBuffer.append( ' ' ).append( aIt->second );
|
|
// on error, just append the complete passed string
|
|
else
|
|
lclAppendToBuffer( rBuffer, pcBeg, pcEnd );
|
|
}
|
|
|
|
void lclProcessElement( OStringBuffer& rBuffer, const OString& rElement )
|
|
{
|
|
// check that passed string starts and ends with the brackets of an XML element
|
|
sal_Int32 nElementLen = rElement.getLength();
|
|
if( nElementLen == 0 )
|
|
return;
|
|
|
|
const sal_Char* pcOpen = rElement.getStr();
|
|
const sal_Char* pcClose = pcOpen + nElementLen - 1;
|
|
|
|
// no complete element found
|
|
if( (pcOpen >= pcClose) || (*pcOpen != '<') || (*pcClose != '>') )
|
|
{
|
|
// just append all passed characters
|
|
rBuffer.append( rElement );
|
|
}
|
|
|
|
// skip parser instructions: '<![...]>'
|
|
else if( (nElementLen >= 5) && (pcOpen[ 1 ] == '!') && (pcOpen[ 2 ] == '[') && (pcClose[ -1 ] == ']') )
|
|
{
|
|
// do nothing
|
|
}
|
|
|
|
// replace '<br>' element with newline
|
|
else if( (nElementLen >= 4) && (pcOpen[ 1 ] == 'b') && (pcOpen[ 2 ] == 'r') && (lclFindNonWhiteSpace( pcOpen + 3, pcClose ) == pcClose) )
|
|
{
|
|
rBuffer.append( '\n' );
|
|
}
|
|
|
|
// check start elements and simple elements for repeated attributes
|
|
else if( pcOpen[ 1 ] != '/' )
|
|
{
|
|
// find positions of text content inside brackets, exclude '/' in '<simpleelement/>'
|
|
const sal_Char* pcContentBeg = pcOpen + 1;
|
|
bool bIsEmptyElement = pcClose[ -1 ] == '/';
|
|
const sal_Char* pcContentEnd = bIsEmptyElement ? (pcClose - 1) : pcClose;
|
|
// append opening bracket and element name to buffer
|
|
const sal_Char* pcWhiteSpace = lclFindWhiteSpace( pcContentBeg, pcContentEnd );
|
|
lclAppendToBuffer( rBuffer, pcOpen, pcWhiteSpace );
|
|
// find begin of attributes, and process all attributes
|
|
const sal_Char* pcAttribBeg = lclFindNonWhiteSpace( pcWhiteSpace, pcContentEnd );
|
|
if( pcAttribBeg < pcContentEnd )
|
|
lclProcessAttribs( rBuffer, pcAttribBeg, pcContentEnd );
|
|
// close the element
|
|
if( bIsEmptyElement )
|
|
rBuffer.append( '/' );
|
|
rBuffer.append( '>' );
|
|
}
|
|
|
|
// append end elements without further processing
|
|
else
|
|
{
|
|
rBuffer.append( rElement );
|
|
}
|
|
}
|
|
|
|
bool lclProcessCharacters( OStringBuffer& rBuffer, const OString& rChars )
|
|
{
|
|
/* MSO has a very weird way to store and handle whitespaces. The stream
|
|
may contain lots of spaces, tabs, and newlines which have to be handled
|
|
as single space character. This will be done in this function.
|
|
|
|
If the element text contains a literal line break, it will be stored as
|
|
<br> tag (without matching </br> element). This input stream wrapper
|
|
will replace this element with a literal LF character (see below).
|
|
|
|
A single space character for its own is stored as is. Example: The
|
|
element
|
|
<font> </font>
|
|
represents a single space character. The XML parser will ignore this
|
|
space character completely without issuing a 'characters' event. The
|
|
VML import filter implementation has to react on this case manually.
|
|
|
|
A single space character following another character is stored
|
|
literally and must not be stipped away here. Example: The element
|
|
<font>abc </font>
|
|
contains the three letters a, b, and c, followed by a space character.
|
|
|
|
Consecutive space characters, or a leading single space character, are
|
|
stored in a <span> element. If there are N space characters (N > 1),
|
|
then the <span> element contains exactly (N-1) NBSP (non-breaking
|
|
space) characters, followed by a regular space character. Examples:
|
|
The element
|
|
<font><span style='mso-spacerun:yes'>\xA0\xA0\xA0 </span></font>
|
|
represents 4 consecutive space characters. Has to be handled by the
|
|
implementation. The element
|
|
<font><span style='mso-spacerun:yes'> abc</span></font>
|
|
represents a space characters followed by the letters a, b, c. These
|
|
strings have to be handled by the VML import filter implementation.
|
|
*/
|
|
|
|
// passed string ends with the leading opening bracket of an XML element
|
|
const sal_Char* pcBeg = rChars.getStr();
|
|
const sal_Char* pcEnd = pcBeg + rChars.getLength();
|
|
bool bHasBracket = (pcBeg < pcEnd) && (pcEnd[ -1 ] == '<');
|
|
if( bHasBracket ) --pcEnd;
|
|
|
|
// skip leading whitespace
|
|
const sal_Char* pcContentsBeg = lclFindNonWhiteSpace( pcBeg, pcEnd );
|
|
while( pcContentsBeg < pcEnd )
|
|
{
|
|
const sal_Char* pcWhitespaceBeg = lclFindWhiteSpace( pcContentsBeg + 1, pcEnd );
|
|
lclAppendToBuffer( rBuffer, pcContentsBeg, pcWhitespaceBeg );
|
|
if( pcWhitespaceBeg < pcEnd )
|
|
rBuffer.append( ' ' );
|
|
pcContentsBeg = lclFindNonWhiteSpace( pcWhitespaceBeg, pcEnd );
|
|
}
|
|
|
|
return bHasBracket;
|
|
}
|
|
|
|
} // namespace
|
|
|
|
// ============================================================================
|
|
|
|
InputStream::InputStream( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStrm ) :
|
|
// use single-byte ISO-8859-1 encoding which maps all byte characters to the first 256 Unicode characters
|
|
mxTextStrm( TextInputStream::createXTextInputStream( rxContext, rxInStrm, RTL_TEXTENCODING_ISO_8859_1 ) ),
|
|
maOpeningBracket( 1 ),
|
|
maClosingBracket( 1 ),
|
|
maOpeningCData( CREATE_OSTRING( "<![CDATA[" ) ),
|
|
maClosingCData( CREATE_OSTRING( "]]>" ) ),
|
|
mnBufferPos( 0 )
|
|
{
|
|
maOpeningBracket[ 0 ] = '<';
|
|
maClosingBracket[ 0 ] = '>';
|
|
}
|
|
|
|
InputStream::~InputStream()
|
|
{
|
|
}
|
|
|
|
sal_Int32 SAL_CALL InputStream::readBytes( Sequence< sal_Int8 >& rData, sal_Int32 nBytesToRead )
|
|
throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
|
|
{
|
|
if( nBytesToRead < 0 )
|
|
throw IOException();
|
|
|
|
rData.realloc( nBytesToRead );
|
|
sal_Int8* pcDest = rData.getArray();
|
|
sal_Int32 nRet = 0;
|
|
while( (nBytesToRead > 0) && !mxTextStrm->isEOF() )
|
|
{
|
|
updateBuffer();
|
|
sal_Int32 nReadSize = ::std::min( nBytesToRead, maBuffer.getLength() - mnBufferPos );
|
|
if( nReadSize > 0 )
|
|
{
|
|
memcpy( pcDest + nRet, maBuffer.getStr() + mnBufferPos, static_cast< size_t >( nReadSize ) );
|
|
mnBufferPos += nReadSize;
|
|
nBytesToRead -= nReadSize;
|
|
nRet += nReadSize;
|
|
}
|
|
}
|
|
if( nRet < rData.getLength() )
|
|
rData.realloc( nRet );
|
|
return nRet;
|
|
}
|
|
|
|
sal_Int32 SAL_CALL InputStream::readSomeBytes( Sequence< sal_Int8 >& rData, sal_Int32 nMaxBytesToRead )
|
|
throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
|
|
{
|
|
return readBytes( rData, nMaxBytesToRead );
|
|
}
|
|
|
|
void SAL_CALL InputStream::skipBytes( sal_Int32 nBytesToSkip )
|
|
throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
|
|
{
|
|
if( nBytesToSkip < 0 )
|
|
throw IOException();
|
|
|
|
while( (nBytesToSkip > 0) && !mxTextStrm->isEOF() )
|
|
{
|
|
updateBuffer();
|
|
sal_Int32 nSkipSize = ::std::min( nBytesToSkip, maBuffer.getLength() - mnBufferPos );
|
|
mnBufferPos += nSkipSize;
|
|
nBytesToSkip -= nSkipSize;
|
|
}
|
|
}
|
|
|
|
sal_Int32 SAL_CALL InputStream::available() throw (NotConnectedException, IOException, RuntimeException)
|
|
{
|
|
updateBuffer();
|
|
return maBuffer.getLength() - mnBufferPos;
|
|
}
|
|
|
|
void SAL_CALL InputStream::closeInput() throw (NotConnectedException, IOException, RuntimeException)
|
|
{
|
|
mxTextStrm->closeInput();
|
|
}
|
|
|
|
// private --------------------------------------------------------------------
|
|
|
|
void InputStream::updateBuffer() throw (IOException, RuntimeException)
|
|
{
|
|
while( (mnBufferPos >= maBuffer.getLength()) && !mxTextStrm->isEOF() )
|
|
{
|
|
// collect new contents in a string buffer
|
|
OStringBuffer aBuffer;
|
|
|
|
// read and process characters until the opening bracket of the next XML element
|
|
OString aChars = readToElementBegin();
|
|
bool bHasOpeningBracket = lclProcessCharacters( aBuffer, aChars );
|
|
|
|
// read and process characters until (and including) closing bracket (an XML element)
|
|
OSL_ENSURE( bHasOpeningBracket || mxTextStrm->isEOF(), "InputStream::updateBuffer - missing opening bracket of XML element" );
|
|
if( bHasOpeningBracket && !mxTextStrm->isEOF() )
|
|
{
|
|
// read the element text (add the leading opening bracket manually)
|
|
OString aElement = OString( '<' ) + readToElementEnd();
|
|
// check for CDATA part, starting with '<![CDATA['
|
|
if( aElement.match( maOpeningCData ) )
|
|
{
|
|
// search the end tag ']]>'
|
|
while( ((aElement.getLength() < maClosingCData.getLength()) || !aElement.match( maClosingCData, aElement.getLength() - maClosingCData.getLength() )) && !mxTextStrm->isEOF() )
|
|
aElement += readToElementEnd();
|
|
// copy the entire CDATA part
|
|
aBuffer.append( aElement );
|
|
}
|
|
else
|
|
{
|
|
// no CDATA part - process the contents of the element
|
|
lclProcessElement( aBuffer, aElement );
|
|
}
|
|
}
|
|
|
|
maBuffer = aBuffer.makeStringAndClear();
|
|
mnBufferPos = 0;
|
|
}
|
|
}
|
|
|
|
OString InputStream::readToElementBegin() throw (IOException, RuntimeException)
|
|
{
|
|
return OUStringToOString( mxTextStrm->readString( maOpeningBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
|
|
}
|
|
|
|
OString InputStream::readToElementEnd() throw (IOException, RuntimeException)
|
|
{
|
|
OString aText = OUStringToOString( mxTextStrm->readString( maClosingBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
|
|
OSL_ENSURE( !aText.isEmpty() && (aText[ aText.getLength() - 1 ] == '>'), "InputStream::readToElementEnd - missing closing bracket of XML element" );
|
|
return aText;
|
|
}
|
|
|
|
// ============================================================================
|
|
|
|
} // namespace vml
|
|
} // namespave oox
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|