5c99f2c852
Change-Id: I8d02a51a0da5aad4cd95e15fe6bb329b43e32067
1596 lines
59 KiB
C++
1596 lines
59 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*
|
|
* This file is part of the LibreOffice project.
|
|
*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
*
|
|
* This file incorporates work covered by the following license notice:
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
* with this work for additional information regarding copyright
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
|
*/
|
|
|
|
#include <cstddef>
|
|
#include <limits>
|
|
#include <memory>
|
|
|
|
#include <osl/diagnose.h>
|
|
#include <rtl/ustring.hxx>
|
|
#include <rtl/strbuf.hxx>
|
|
#include <rtl/tencinfo.h>
|
|
#include <tools/inetmime.hxx>
|
|
#include <rtl/character.hxx>
|
|
|
|
namespace {
|
|
|
|
/** Check for US-ASCII white space character.
|
|
|
|
@param nChar Some UCS-4 character.
|
|
|
|
@return True if nChar is a US-ASCII white space character (US-ASCII
|
|
0x09 or 0x20).
|
|
*/
|
|
inline bool isWhiteSpace(sal_uInt32 nChar);
|
|
|
|
/** Check whether some character is valid within an RFC 2045 <token>.
|
|
|
|
@param nChar Some UCS-4 character.
|
|
|
|
@return True if nChar is valid within an RFC 2047 <token> (US-ASCII
|
|
'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
|
|
'-', '.', '^', '_', '`', '{', '|', '}', or '~').
|
|
*/
|
|
bool isTokenChar(sal_uInt32 nChar);
|
|
|
|
/** Get the Base 64 digit weight of a US-ASCII character.
|
|
|
|
@param nChar Some UCS-4 character.
|
|
|
|
@return If nChar is a US-ASCII Base 64 digit character (US-ASCII
|
|
'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
|
|
corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
|
|
character (US-ASCII '='), return -1; otherwise, return -2.
|
|
*/
|
|
inline int getBase64Weight(sal_uInt32 nChar);
|
|
|
|
inline bool startsWithLineFolding(const sal_Unicode * pBegin,
|
|
const sal_Unicode * pEnd);
|
|
|
|
const sal_Unicode * skipComment(const sal_Unicode * pBegin,
|
|
const sal_Unicode * pEnd);
|
|
|
|
const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
|
|
pBegin,
|
|
const sal_Unicode *
|
|
pEnd);
|
|
|
|
const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
|
|
const sal_Unicode * pEnd);
|
|
|
|
sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
|
|
sal_Unicode const * pEnd,
|
|
INetContentTypeParameterList *
|
|
pParameters);
|
|
|
|
inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
|
|
eEncoding);
|
|
|
|
rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
|
|
const sal_Char * pEnd);
|
|
|
|
inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding);
|
|
|
|
sal_Unicode * convertToUnicode(const sal_Char * pBegin,
|
|
const sal_Char * pEnd,
|
|
rtl_TextEncoding eEncoding,
|
|
sal_Size & rSize);
|
|
|
|
sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
|
|
const sal_Unicode * pEnd,
|
|
rtl_TextEncoding eEncoding,
|
|
sal_Size & rSize);
|
|
|
|
void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar);
|
|
|
|
bool translateUTF8Char(const sal_Char *& rBegin,
|
|
const sal_Char * pEnd,
|
|
rtl_TextEncoding eEncoding,
|
|
sal_uInt32 & rCharacter);
|
|
|
|
/** Put the UTF-16 encoding of a UTF-32 character into a buffer.
|
|
|
|
@param pBuffer Points to a buffer, must not be null.
|
|
|
|
@param nUTF32 An UTF-32 character, must be in the range 0..0x10FFFF.
|
|
|
|
@return A pointer past the UTF-16 characters put into the buffer
|
|
(i.e., pBuffer + 1 or pBuffer + 2).
|
|
*/
|
|
inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
|
|
sal_uInt32 nUTF32);
|
|
|
|
inline bool isWhiteSpace(sal_uInt32 nChar)
|
|
{
|
|
return nChar == '\t' || nChar == ' ';
|
|
}
|
|
|
|
inline int getBase64Weight(sal_uInt32 nChar)
|
|
{
|
|
return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
|
|
rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
|
|
rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
|
|
nChar == '+' ? 62 :
|
|
nChar == '/' ? 63 :
|
|
nChar == '=' ? -1 : -2;
|
|
}
|
|
|
|
inline bool startsWithLineFolding(const sal_Unicode * pBegin,
|
|
const sal_Unicode * pEnd)
|
|
{
|
|
DBG_ASSERT(pBegin && pBegin <= pEnd,
|
|
"startsWithLineFolding(): Bad sequence");
|
|
|
|
return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
|
|
&& isWhiteSpace(pBegin[2]); // CR, LF
|
|
}
|
|
|
|
inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
|
|
eEncoding)
|
|
{
|
|
#if defined(_WIN32)
|
|
return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
|
|
RTL_TEXTENCODING_MS_1252 : eEncoding;
|
|
#else
|
|
return eEncoding;
|
|
#endif
|
|
}
|
|
|
|
inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
|
|
{
|
|
return rtl_isOctetTextEncoding(eEncoding);
|
|
}
|
|
|
|
sal_Unicode * convertToUnicode(const sal_Char * pBegin,
|
|
const sal_Char * pEnd,
|
|
rtl_TextEncoding eEncoding,
|
|
sal_Size & rSize)
|
|
{
|
|
if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
|
|
return nullptr;
|
|
rtl_TextToUnicodeConverter hConverter
|
|
= rtl_createTextToUnicodeConverter(eEncoding);
|
|
rtl_TextToUnicodeContext hContext
|
|
= rtl_createTextToUnicodeContext(hConverter);
|
|
sal_Unicode * pBuffer;
|
|
sal_uInt32 nInfo;
|
|
for (sal_Size nBufferSize = pEnd - pBegin;;
|
|
nBufferSize += nBufferSize / 3 + 1)
|
|
{
|
|
pBuffer = new sal_Unicode[nBufferSize];
|
|
sal_Size nSrcCvtBytes;
|
|
rSize = rtl_convertTextToUnicode(
|
|
hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
|
|
nBufferSize,
|
|
RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
|
|
| RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
|
|
| RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
|
|
&nInfo, &nSrcCvtBytes);
|
|
if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
|
|
break;
|
|
delete[] pBuffer;
|
|
rtl_resetTextToUnicodeContext(hConverter, hContext);
|
|
}
|
|
rtl_destroyTextToUnicodeContext(hConverter, hContext);
|
|
rtl_destroyTextToUnicodeConverter(hConverter);
|
|
if (nInfo != 0)
|
|
{
|
|
delete[] pBuffer;
|
|
pBuffer = nullptr;
|
|
}
|
|
return pBuffer;
|
|
}
|
|
|
|
sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
|
|
const sal_Unicode * pEnd,
|
|
rtl_TextEncoding eEncoding,
|
|
sal_Size & rSize)
|
|
{
|
|
if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
|
|
return nullptr;
|
|
rtl_UnicodeToTextConverter hConverter
|
|
= rtl_createUnicodeToTextConverter(eEncoding);
|
|
rtl_UnicodeToTextContext hContext
|
|
= rtl_createUnicodeToTextContext(hConverter);
|
|
sal_Char * pBuffer;
|
|
sal_uInt32 nInfo;
|
|
for (sal_Size nBufferSize = pEnd - pBegin;;
|
|
nBufferSize += nBufferSize / 3 + 1)
|
|
{
|
|
pBuffer = new sal_Char[nBufferSize];
|
|
sal_Size nSrcCvtBytes;
|
|
rSize = rtl_convertUnicodeToText(
|
|
hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
|
|
nBufferSize,
|
|
RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
|
|
| RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
|
|
| RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
|
|
| RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR,
|
|
&nInfo, &nSrcCvtBytes);
|
|
if (nInfo != RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)
|
|
break;
|
|
delete[] pBuffer;
|
|
rtl_resetUnicodeToTextContext(hConverter, hContext);
|
|
}
|
|
rtl_destroyUnicodeToTextContext(hConverter, hContext);
|
|
rtl_destroyUnicodeToTextConverter(hConverter);
|
|
if (nInfo != 0)
|
|
{
|
|
delete[] pBuffer;
|
|
pBuffer = nullptr;
|
|
}
|
|
return pBuffer;
|
|
}
|
|
|
|
inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
|
|
sal_uInt32 nUTF32)
|
|
{
|
|
DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char");
|
|
if (nUTF32 < 0x10000)
|
|
*pBuffer++ = sal_Unicode(nUTF32);
|
|
else
|
|
{
|
|
nUTF32 -= 0x10000;
|
|
*pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
|
|
*pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
|
|
}
|
|
return pBuffer;
|
|
}
|
|
|
|
void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar)
|
|
{
|
|
// See RFC 2279 for a discussion of UTF-8.
|
|
DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");
|
|
|
|
if (nChar < 0x80)
|
|
rSink << sal_Char(nChar);
|
|
else if (nChar < 0x800)
|
|
rSink << sal_Char(nChar >> 6 | 0xC0)
|
|
<< sal_Char((nChar & 0x3F) | 0x80);
|
|
else if (nChar < 0x10000)
|
|
rSink << sal_Char(nChar >> 12 | 0xE0)
|
|
<< sal_Char((nChar >> 6 & 0x3F) | 0x80)
|
|
<< sal_Char((nChar & 0x3F) | 0x80);
|
|
else if (nChar < 0x200000)
|
|
rSink << sal_Char(nChar >> 18 | 0xF0)
|
|
<< sal_Char((nChar >> 12 & 0x3F) | 0x80)
|
|
<< sal_Char((nChar >> 6 & 0x3F) | 0x80)
|
|
<< sal_Char((nChar & 0x3F) | 0x80);
|
|
else if (nChar < 0x4000000)
|
|
rSink << sal_Char(nChar >> 24 | 0xF8)
|
|
<< sal_Char((nChar >> 18 & 0x3F) | 0x80)
|
|
<< sal_Char((nChar >> 12 & 0x3F) | 0x80)
|
|
<< sal_Char((nChar >> 6 & 0x3F) | 0x80)
|
|
<< sal_Char((nChar & 0x3F) | 0x80);
|
|
else
|
|
rSink << sal_Char(nChar >> 30 | 0xFC)
|
|
<< sal_Char((nChar >> 24 & 0x3F) | 0x80)
|
|
<< sal_Char((nChar >> 18 & 0x3F) | 0x80)
|
|
<< sal_Char((nChar >> 12 & 0x3F) | 0x80)
|
|
<< sal_Char((nChar >> 6 & 0x3F) | 0x80)
|
|
<< sal_Char((nChar & 0x3F) | 0x80);
|
|
}
|
|
|
|
bool translateUTF8Char(const sal_Char *& rBegin,
|
|
const sal_Char * pEnd,
|
|
rtl_TextEncoding eEncoding,
|
|
sal_uInt32 & rCharacter)
|
|
{
|
|
if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
|
|
|| static_cast< unsigned char >(*rBegin) >= 0xFE)
|
|
return false;
|
|
|
|
int nCount;
|
|
sal_uInt32 nMin;
|
|
sal_uInt32 nUCS4;
|
|
const sal_Char * p = rBegin;
|
|
if (static_cast< unsigned char >(*p) < 0xE0)
|
|
{
|
|
nCount = 1;
|
|
nMin = 0x80;
|
|
nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
|
|
}
|
|
else if (static_cast< unsigned char >(*p) < 0xF0)
|
|
{
|
|
nCount = 2;
|
|
nMin = 0x800;
|
|
nUCS4 = static_cast< unsigned char >(*p) & 0xF;
|
|
}
|
|
else if (static_cast< unsigned char >(*p) < 0xF8)
|
|
{
|
|
nCount = 3;
|
|
nMin = 0x10000;
|
|
nUCS4 = static_cast< unsigned char >(*p) & 7;
|
|
}
|
|
else if (static_cast< unsigned char >(*p) < 0xFC)
|
|
{
|
|
nCount = 4;
|
|
nMin = 0x200000;
|
|
nUCS4 = static_cast< unsigned char >(*p) & 3;
|
|
}
|
|
else
|
|
{
|
|
nCount = 5;
|
|
nMin = 0x4000000;
|
|
nUCS4 = static_cast< unsigned char >(*p) & 1;
|
|
}
|
|
++p;
|
|
|
|
for (; nCount-- > 0; ++p)
|
|
if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
|
|
nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
|
|
else
|
|
return false;
|
|
|
|
if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
|
|
return false;
|
|
|
|
if (eEncoding >= RTL_TEXTENCODING_UCS4)
|
|
rCharacter = nUCS4;
|
|
else
|
|
{
|
|
sal_Unicode aUTF16[2];
|
|
const sal_Unicode * pUTF16End = putUTF32Character(aUTF16, nUCS4);
|
|
sal_Size nSize;
|
|
sal_Char * pBuffer = convertFromUnicode(aUTF16, pUTF16End, eEncoding,
|
|
nSize);
|
|
if (!pBuffer)
|
|
return false;
|
|
DBG_ASSERT(nSize == 1,
|
|
"translateUTF8Char(): Bad conversion");
|
|
rCharacter = *pBuffer;
|
|
delete[] pBuffer;
|
|
}
|
|
rBegin = p;
|
|
return true;
|
|
}
|
|
|
|
void appendISO88591(OUString & rText, sal_Char const * pBegin,
|
|
sal_Char const * pEnd);
|
|
|
|
struct Parameter
|
|
{
|
|
Parameter * m_pNext;
|
|
OString m_aAttribute;
|
|
OString m_aCharset;
|
|
OString m_aValue;
|
|
sal_uInt32 m_nSection;
|
|
bool m_bExtended;
|
|
|
|
inline Parameter(Parameter * pTheNext, const OString& rTheAttribute,
|
|
const OString& rTheCharset,
|
|
const OString& rTheValue, sal_uInt32 nTheSection,
|
|
bool bTheExtended);
|
|
};
|
|
|
|
inline Parameter::Parameter(Parameter * pTheNext,
|
|
const OString& rTheAttribute,
|
|
const OString& rTheCharset,
|
|
const OString& rTheValue,
|
|
sal_uInt32 nTheSection, bool bTheExtended):
|
|
m_pNext(pTheNext),
|
|
m_aAttribute(rTheAttribute),
|
|
m_aCharset(rTheCharset),
|
|
m_aValue(rTheValue),
|
|
m_nSection(nTheSection),
|
|
m_bExtended(bTheExtended)
|
|
{}
|
|
|
|
struct ParameterList
|
|
{
|
|
Parameter * m_pList;
|
|
|
|
ParameterList(): m_pList(nullptr) {}
|
|
|
|
inline ~ParameterList();
|
|
|
|
Parameter ** find(const OString& rAttribute, sal_uInt32 nSection,
|
|
bool & rPresent);
|
|
};
|
|
|
|
inline ParameterList::~ParameterList()
|
|
{
|
|
while (m_pList)
|
|
{
|
|
Parameter * pNext = m_pList->m_pNext;
|
|
delete m_pList;
|
|
m_pList = pNext;
|
|
}
|
|
}
|
|
|
|
bool parseParameters(ParameterList const & rInput,
|
|
INetContentTypeParameterList * pOutput);
|
|
|
|
// appendISO88591
|
|
|
|
void appendISO88591(OUString & rText, sal_Char const * pBegin,
|
|
sal_Char const * pEnd)
|
|
{
|
|
sal_Int32 nLength = pEnd - pBegin;
|
|
std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
|
|
for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
|
|
*p++ = static_cast<unsigned char>(*pBegin++);
|
|
rText += OUString(pBuffer.get(), nLength);
|
|
}
|
|
|
|
// ParameterList
|
|
|
|
Parameter ** ParameterList::find(const OString& rAttribute,
|
|
sal_uInt32 nSection, bool & rPresent)
|
|
{
|
|
Parameter ** p = &m_pList;
|
|
for (; *p; p = &(*p)->m_pNext)
|
|
{
|
|
sal_Int32 nCompare = rAttribute.compareTo((*p)->m_aAttribute);
|
|
if (nCompare > 0)
|
|
break;
|
|
else if (nCompare == 0)
|
|
{
|
|
if (nSection > (*p)->m_nSection)
|
|
break;
|
|
else if (nSection == (*p)->m_nSection)
|
|
{
|
|
rPresent = true;
|
|
return p;
|
|
}
|
|
}
|
|
}
|
|
rPresent = false;
|
|
return p;
|
|
}
|
|
|
|
// parseParameters
|
|
|
|
bool parseParameters(ParameterList const & rInput,
|
|
INetContentTypeParameterList * pOutput)
|
|
{
|
|
if (pOutput)
|
|
pOutput->clear();
|
|
|
|
Parameter * pPrev = nullptr;
|
|
for (Parameter * p = rInput.m_pList; p; p = p->m_pNext)
|
|
{
|
|
if (p->m_nSection > 0
|
|
&& (!pPrev
|
|
|| pPrev->m_nSection != p->m_nSection - 1
|
|
|| pPrev->m_aAttribute != p->m_aAttribute))
|
|
return false;
|
|
pPrev = p;
|
|
}
|
|
|
|
if (pOutput)
|
|
for (Parameter * p = rInput.m_pList; p;)
|
|
{
|
|
bool bCharset = !p->m_aCharset.isEmpty();
|
|
rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
|
|
if (bCharset)
|
|
eEncoding
|
|
= getCharsetEncoding(p->m_aCharset.getStr(),
|
|
p->m_aCharset.getStr()
|
|
+ rInput.m_pList->
|
|
m_aCharset.
|
|
getLength());
|
|
OUString aValue;
|
|
bool bBadEncoding = false;
|
|
Parameter * pNext = p;
|
|
do
|
|
{
|
|
sal_Size nSize;
|
|
sal_Unicode * pUnicode
|
|
= convertToUnicode(pNext->m_aValue.getStr(),
|
|
pNext->m_aValue.getStr()
|
|
+ pNext->m_aValue.getLength(),
|
|
bCharset && p->m_bExtended ?
|
|
eEncoding :
|
|
RTL_TEXTENCODING_UTF8,
|
|
nSize);
|
|
if (!pUnicode && !(bCharset && p->m_bExtended))
|
|
pUnicode = convertToUnicode(
|
|
pNext->m_aValue.getStr(),
|
|
pNext->m_aValue.getStr()
|
|
+ pNext->m_aValue.getLength(),
|
|
RTL_TEXTENCODING_ISO_8859_1, nSize);
|
|
if (!pUnicode)
|
|
{
|
|
bBadEncoding = true;
|
|
break;
|
|
}
|
|
aValue += OUString(pUnicode, static_cast<sal_Int32>(nSize));
|
|
delete[] pUnicode;
|
|
pNext = pNext->m_pNext;
|
|
}
|
|
while (pNext && pNext->m_nSection > 0);
|
|
if (bBadEncoding)
|
|
{
|
|
aValue.clear();
|
|
for (pNext = p;;)
|
|
{
|
|
if (pNext->m_bExtended)
|
|
{
|
|
for (sal_Int32 i = 0; i < pNext->m_aValue.getLength(); ++i)
|
|
aValue += OUStringLiteral1(
|
|
sal_Unicode(
|
|
static_cast<unsigned char>(pNext->m_aValue[i]))
|
|
| 0xF800);
|
|
}
|
|
else
|
|
{
|
|
for (sal_Int32 i = 0; i < pNext->m_aValue.getLength(); ++i)
|
|
aValue += OUStringLiteral1( static_cast<unsigned char>(pNext->m_aValue[i]) );
|
|
}
|
|
pNext = pNext->m_pNext;
|
|
if (!pNext || pNext->m_nSection == 0)
|
|
break;
|
|
};
|
|
}
|
|
INetContentTypeParameter x {aValue}; // workaround ICE in VisualStudio2013
|
|
auto const ret = pOutput->insert({p->m_aAttribute, x });
|
|
SAL_INFO_IF(!ret.second, "tools",
|
|
"INetMIME: dropping duplicate parameter: " << p->m_aAttribute);
|
|
p = pNext;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
bool isTokenChar(sal_uInt32 nChar)
|
|
{
|
|
static const bool aMap[128]
|
|
= { false, false, false, false, false, false, false, false,
|
|
false, false, false, false, false, false, false, false,
|
|
false, false, false, false, false, false, false, false,
|
|
false, false, false, false, false, false, false, false,
|
|
false, true, false, true, true, true, true, true, // !"#$%&'
|
|
false, false, true, true, false, true, true, false, //()*+,-./
|
|
true, true, true, true, true, true, true, true, //01234567
|
|
true, true, false, false, false, false, false, false, //89:;<=>?
|
|
false, true, true, true, true, true, true, true, //@ABCDEFG
|
|
true, true, true, true, true, true, true, true, //HIJKLMNO
|
|
true, true, true, true, true, true, true, true, //PQRSTUVW
|
|
true, true, true, false, false, false, true, true, //XYZ[\]^_
|
|
true, true, true, true, true, true, true, true, //`abcdefg
|
|
true, true, true, true, true, true, true, true, //hijklmno
|
|
true, true, true, true, true, true, true, true, //pqrstuvw
|
|
true, true, true, true, true, true, true, false //xyz{|}~
|
|
};
|
|
return rtl::isAscii(nChar) && aMap[nChar];
|
|
}
|
|
|
|
const sal_Unicode * skipComment(const sal_Unicode * pBegin,
|
|
const sal_Unicode * pEnd)
|
|
{
|
|
DBG_ASSERT(pBegin && pBegin <= pEnd,
|
|
"skipComment(): Bad sequence");
|
|
|
|
if (pBegin != pEnd && *pBegin == '(')
|
|
{
|
|
sal_uInt32 nLevel = 0;
|
|
for (const sal_Unicode * p = pBegin; p != pEnd;)
|
|
switch (*p++)
|
|
{
|
|
case '(':
|
|
++nLevel;
|
|
break;
|
|
|
|
case ')':
|
|
if (--nLevel == 0)
|
|
return p;
|
|
break;
|
|
|
|
case '\\':
|
|
if (p != pEnd)
|
|
++p;
|
|
break;
|
|
}
|
|
}
|
|
return pBegin;
|
|
}
|
|
|
|
const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
|
|
pBegin,
|
|
const sal_Unicode *
|
|
pEnd)
|
|
{
|
|
DBG_ASSERT(pBegin && pBegin <= pEnd,
|
|
"skipLinearWhiteSpaceComment(): Bad sequence");
|
|
|
|
while (pBegin != pEnd)
|
|
switch (*pBegin)
|
|
{
|
|
case '\t':
|
|
case ' ':
|
|
++pBegin;
|
|
break;
|
|
|
|
case 0x0D: // CR
|
|
if (startsWithLineFolding(pBegin, pEnd))
|
|
pBegin += 3;
|
|
else
|
|
return pBegin;
|
|
break;
|
|
|
|
case '(':
|
|
{
|
|
const sal_Unicode * p = skipComment(pBegin, pEnd);
|
|
if (p == pBegin)
|
|
return pBegin;
|
|
pBegin = p;
|
|
break;
|
|
}
|
|
|
|
default:
|
|
return pBegin;
|
|
}
|
|
return pBegin;
|
|
}
|
|
|
|
const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
|
|
const sal_Unicode * pEnd)
|
|
{
|
|
DBG_ASSERT(pBegin && pBegin <= pEnd,
|
|
"skipQuotedString(): Bad sequence");
|
|
|
|
if (pBegin != pEnd && *pBegin == '"')
|
|
for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
|
|
switch (*p++)
|
|
{
|
|
case 0x0D: // CR
|
|
if (pEnd - p < 2 || *p++ != 0x0A // LF
|
|
|| !isWhiteSpace(*p++))
|
|
return pBegin;
|
|
break;
|
|
|
|
case '"':
|
|
return p;
|
|
|
|
case '\\':
|
|
if (p != pEnd)
|
|
++p;
|
|
break;
|
|
}
|
|
return pBegin;
|
|
}
|
|
|
|
sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
|
|
sal_Unicode const * pEnd,
|
|
INetContentTypeParameterList *
|
|
pParameters)
|
|
{
|
|
ParameterList aList;
|
|
sal_Unicode const * pParameterBegin = pBegin;
|
|
for (sal_Unicode const * p = pParameterBegin;;)
|
|
{
|
|
pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
|
|
if (pParameterBegin == pEnd || *pParameterBegin != ';')
|
|
break;
|
|
p = pParameterBegin + 1;
|
|
|
|
sal_Unicode const * pAttributeBegin
|
|
= skipLinearWhiteSpaceComment(p, pEnd);
|
|
p = pAttributeBegin;
|
|
bool bDowncaseAttribute = false;
|
|
while (p != pEnd && isTokenChar(*p) && *p != '*')
|
|
{
|
|
bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
|
|
++p;
|
|
}
|
|
if (p == pAttributeBegin)
|
|
break;
|
|
OString aAttribute = OString(
|
|
pAttributeBegin, p - pAttributeBegin,
|
|
RTL_TEXTENCODING_ASCII_US);
|
|
if (bDowncaseAttribute)
|
|
aAttribute = aAttribute.toAsciiLowerCase();
|
|
|
|
sal_uInt32 nSection = 0;
|
|
if (p != pEnd && *p == '*')
|
|
{
|
|
++p;
|
|
if (p != pEnd && rtl::isAsciiDigit(*p)
|
|
&& !INetMIME::scanUnsigned(p, pEnd, false, nSection))
|
|
break;
|
|
}
|
|
|
|
bool bPresent;
|
|
Parameter ** pPos = aList.find(aAttribute, nSection, bPresent);
|
|
if (bPresent)
|
|
break;
|
|
|
|
bool bExtended = false;
|
|
if (p != pEnd && *p == '*')
|
|
{
|
|
++p;
|
|
bExtended = true;
|
|
}
|
|
|
|
p = skipLinearWhiteSpaceComment(p, pEnd);
|
|
|
|
if (p == pEnd || *p != '=')
|
|
break;
|
|
|
|
p = skipLinearWhiteSpaceComment(p + 1, pEnd);
|
|
|
|
OString aCharset;
|
|
OString aLanguage;
|
|
OString aValue;
|
|
if (bExtended)
|
|
{
|
|
if (nSection == 0)
|
|
{
|
|
sal_Unicode const * pCharsetBegin = p;
|
|
bool bDowncaseCharset = false;
|
|
while (p != pEnd && isTokenChar(*p) && *p != '\'')
|
|
{
|
|
bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
|
|
++p;
|
|
}
|
|
if (p == pCharsetBegin)
|
|
break;
|
|
if (pParameters)
|
|
{
|
|
aCharset = OString(
|
|
pCharsetBegin,
|
|
p - pCharsetBegin,
|
|
RTL_TEXTENCODING_ASCII_US);
|
|
if (bDowncaseCharset)
|
|
aCharset = aCharset.toAsciiLowerCase();
|
|
}
|
|
|
|
if (p == pEnd || *p != '\'')
|
|
break;
|
|
++p;
|
|
|
|
sal_Unicode const * pLanguageBegin = p;
|
|
bool bDowncaseLanguage = false;
|
|
int nLetters = 0;
|
|
for (; p != pEnd; ++p)
|
|
if (rtl::isAsciiAlpha(*p))
|
|
{
|
|
if (++nLetters > 8)
|
|
break;
|
|
bDowncaseLanguage = bDowncaseLanguage
|
|
|| rtl::isAsciiUpperCase(*p);
|
|
}
|
|
else if (*p == '-')
|
|
{
|
|
if (nLetters == 0)
|
|
break;
|
|
nLetters = 0;
|
|
}
|
|
else
|
|
break;
|
|
if (nLetters == 0 || nLetters > 8)
|
|
break;
|
|
if (pParameters)
|
|
{
|
|
aLanguage = OString(
|
|
pLanguageBegin,
|
|
p - pLanguageBegin,
|
|
RTL_TEXTENCODING_ASCII_US);
|
|
if (bDowncaseLanguage)
|
|
aLanguage = aLanguage.toAsciiLowerCase();
|
|
}
|
|
|
|
if (p == pEnd || *p != '\'')
|
|
break;
|
|
++p;
|
|
}
|
|
if (pParameters)
|
|
{
|
|
INetMIMEOutputSink aSink;
|
|
while (p != pEnd)
|
|
{
|
|
sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
|
|
if (rtl::isAscii(nChar) && !isTokenChar(nChar))
|
|
break;
|
|
if (nChar == '%' && p + 1 < pEnd)
|
|
{
|
|
int nWeight1 = INetMIME::getHexWeight(p[0]);
|
|
int nWeight2 = INetMIME::getHexWeight(p[1]);
|
|
if (nWeight1 >= 0 && nWeight2 >= 0)
|
|
{
|
|
aSink << sal_Char(nWeight1 << 4 | nWeight2);
|
|
p += 2;
|
|
continue;
|
|
}
|
|
}
|
|
writeUTF8(aSink, nChar);
|
|
}
|
|
aValue = aSink.takeBuffer();
|
|
}
|
|
else
|
|
while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
|
|
++p;
|
|
}
|
|
else if (p != pEnd && *p == '"')
|
|
if (pParameters)
|
|
{
|
|
INetMIMEOutputSink aSink;
|
|
bool bInvalid = false;
|
|
for (++p;;)
|
|
{
|
|
if (p == pEnd)
|
|
{
|
|
bInvalid = true;
|
|
break;
|
|
}
|
|
sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
|
|
if (nChar == '"')
|
|
break;
|
|
else if (nChar == 0x0D) // CR
|
|
{
|
|
if (pEnd - p < 2 || *p++ != 0x0A // LF
|
|
|| !isWhiteSpace(*p))
|
|
{
|
|
bInvalid = true;
|
|
break;
|
|
}
|
|
nChar = static_cast<unsigned char>(*p++);
|
|
}
|
|
else if (nChar == '\\')
|
|
{
|
|
if (p == pEnd)
|
|
{
|
|
bInvalid = true;
|
|
break;
|
|
}
|
|
nChar = INetMIME::getUTF32Character(p, pEnd);
|
|
}
|
|
writeUTF8(aSink, nChar);
|
|
}
|
|
if (bInvalid)
|
|
break;
|
|
aValue = aSink.takeBuffer();
|
|
}
|
|
else
|
|
{
|
|
sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
|
|
if (p == pStringEnd)
|
|
break;
|
|
p = pStringEnd;
|
|
}
|
|
else
|
|
{
|
|
sal_Unicode const * pTokenBegin = p;
|
|
while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
|
|
++p;
|
|
if (p == pTokenBegin)
|
|
break;
|
|
if (pParameters)
|
|
aValue = OString(
|
|
pTokenBegin, p - pTokenBegin,
|
|
RTL_TEXTENCODING_UTF8);
|
|
}
|
|
|
|
*pPos = new Parameter(*pPos, aAttribute, aCharset, aValue,
|
|
nSection, bExtended);
|
|
}
|
|
return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
|
|
}
|
|
|
|
bool equalIgnoreCase(const sal_Char * pBegin1,
|
|
const sal_Char * pEnd1,
|
|
const sal_Char * pString2)
|
|
{
|
|
DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
|
|
"equalIgnoreCase(): Bad sequences");
|
|
|
|
while (*pString2 != 0)
|
|
if (pBegin1 == pEnd1
|
|
|| rtl::toAsciiUpperCase(*pBegin1++) != rtl::toAsciiUpperCase(*pString2++))
|
|
return false;
|
|
return pBegin1 == pEnd1;
|
|
}
|
|
|
|
struct EncodingEntry
|
|
{
|
|
sal_Char const * m_aName;
|
|
rtl_TextEncoding m_eEncoding;
|
|
};
|
|
|
|
// The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
|
|
// assignments/character-sets> as of Jan, 21 2000 12:46:00, unless otherwise
|
|
// noted:
|
|
static EncodingEntry const aEncodingMap[]
|
|
= { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
|
|
{ "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
|
|
{ "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
|
|
{ "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
|
|
{ "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
|
|
{ "ASCII", RTL_TEXTENCODING_ASCII_US },
|
|
{ "ISO646-US", RTL_TEXTENCODING_ASCII_US },
|
|
{ "US", RTL_TEXTENCODING_ASCII_US },
|
|
{ "IBM367", RTL_TEXTENCODING_ASCII_US },
|
|
{ "CP367", RTL_TEXTENCODING_ASCII_US },
|
|
{ "CSASCII", RTL_TEXTENCODING_ASCII_US },
|
|
{ "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
|
|
{ "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
|
|
{ "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
|
|
{ "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
|
|
{ "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
|
|
{ "L1", RTL_TEXTENCODING_ISO_8859_1 },
|
|
{ "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
|
|
{ "CP819", RTL_TEXTENCODING_ISO_8859_1 },
|
|
{ "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
|
|
{ "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
|
|
{ "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
|
|
{ "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
|
|
{ "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
|
|
{ "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
|
|
{ "L2", RTL_TEXTENCODING_ISO_8859_2 },
|
|
{ "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
|
|
{ "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
|
|
{ "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
|
|
{ "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
|
|
{ "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
|
|
{ "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
|
|
{ "L3", RTL_TEXTENCODING_ISO_8859_3 },
|
|
{ "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
|
|
{ "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
|
|
{ "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
|
|
{ "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
|
|
{ "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
|
|
{ "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
|
|
{ "L4", RTL_TEXTENCODING_ISO_8859_4 },
|
|
{ "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
|
|
{ "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
|
|
{ "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
|
|
{ "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
|
|
{ "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
|
|
{ "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
|
|
{ "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
|
|
{ "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
|
|
{ "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
|
|
{ "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
|
|
{ "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
|
|
{ "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
|
|
{ "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
|
|
{ "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
|
|
{ "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
|
|
{ "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
|
|
{ "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
|
|
{ "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
|
|
{ "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
|
|
{ "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
|
|
{ "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
|
|
{ "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
|
|
{ "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
|
|
{ "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
|
|
{ "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
|
|
{ "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
|
|
{ "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
|
|
{ "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
|
|
{ "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
|
|
{ "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
|
|
{ "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
|
|
{ "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
|
|
{ "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
|
|
{ "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
|
|
{ "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
|
|
{ "L5", RTL_TEXTENCODING_ISO_8859_9 },
|
|
{ "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
|
|
{ "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
|
|
{ "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
|
|
{ "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
|
|
{ "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
|
|
{ "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
|
|
{ "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
|
|
{ "IBM437", RTL_TEXTENCODING_IBM_437 },
|
|
{ "CP437", RTL_TEXTENCODING_IBM_437 },
|
|
{ "437", RTL_TEXTENCODING_IBM_437 },
|
|
{ "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
|
|
{ "IBM850", RTL_TEXTENCODING_IBM_850 },
|
|
{ "CP850", RTL_TEXTENCODING_IBM_850 },
|
|
{ "850", RTL_TEXTENCODING_IBM_850 },
|
|
{ "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
|
|
{ "IBM860", RTL_TEXTENCODING_IBM_860 },
|
|
{ "CP860", RTL_TEXTENCODING_IBM_860 },
|
|
{ "860", RTL_TEXTENCODING_IBM_860 },
|
|
{ "CSIBM860", RTL_TEXTENCODING_IBM_860 },
|
|
{ "IBM861", RTL_TEXTENCODING_IBM_861 },
|
|
{ "CP861", RTL_TEXTENCODING_IBM_861 },
|
|
{ "861", RTL_TEXTENCODING_IBM_861 },
|
|
{ "CP-IS", RTL_TEXTENCODING_IBM_861 },
|
|
{ "CSIBM861", RTL_TEXTENCODING_IBM_861 },
|
|
{ "IBM863", RTL_TEXTENCODING_IBM_863 },
|
|
{ "CP863", RTL_TEXTENCODING_IBM_863 },
|
|
{ "863", RTL_TEXTENCODING_IBM_863 },
|
|
{ "CSIBM863", RTL_TEXTENCODING_IBM_863 },
|
|
{ "IBM865", RTL_TEXTENCODING_IBM_865 },
|
|
{ "CP865", RTL_TEXTENCODING_IBM_865 },
|
|
{ "865", RTL_TEXTENCODING_IBM_865 },
|
|
{ "CSIBM865", RTL_TEXTENCODING_IBM_865 },
|
|
{ "IBM775", RTL_TEXTENCODING_IBM_775 },
|
|
{ "CP775", RTL_TEXTENCODING_IBM_775 },
|
|
{ "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
|
|
{ "IBM852", RTL_TEXTENCODING_IBM_852 },
|
|
{ "CP852", RTL_TEXTENCODING_IBM_852 },
|
|
{ "852", RTL_TEXTENCODING_IBM_852 },
|
|
{ "CSPCP852", RTL_TEXTENCODING_IBM_852 },
|
|
{ "IBM855", RTL_TEXTENCODING_IBM_855 },
|
|
{ "CP855", RTL_TEXTENCODING_IBM_855 },
|
|
{ "855", RTL_TEXTENCODING_IBM_855 },
|
|
{ "CSIBM855", RTL_TEXTENCODING_IBM_855 },
|
|
{ "IBM857", RTL_TEXTENCODING_IBM_857 },
|
|
{ "CP857", RTL_TEXTENCODING_IBM_857 },
|
|
{ "857", RTL_TEXTENCODING_IBM_857 },
|
|
{ "CSIBM857", RTL_TEXTENCODING_IBM_857 },
|
|
{ "IBM862", RTL_TEXTENCODING_IBM_862 },
|
|
{ "CP862", RTL_TEXTENCODING_IBM_862 },
|
|
{ "862", RTL_TEXTENCODING_IBM_862 },
|
|
{ "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
|
|
{ "IBM864", RTL_TEXTENCODING_IBM_864 },
|
|
{ "CP864", RTL_TEXTENCODING_IBM_864 },
|
|
{ "CSIBM864", RTL_TEXTENCODING_IBM_864 },
|
|
{ "IBM866", RTL_TEXTENCODING_IBM_866 },
|
|
{ "CP866", RTL_TEXTENCODING_IBM_866 },
|
|
{ "866", RTL_TEXTENCODING_IBM_866 },
|
|
{ "CSIBM866", RTL_TEXTENCODING_IBM_866 },
|
|
{ "IBM869", RTL_TEXTENCODING_IBM_869 },
|
|
{ "CP869", RTL_TEXTENCODING_IBM_869 },
|
|
{ "869", RTL_TEXTENCODING_IBM_869 },
|
|
{ "CP-GR", RTL_TEXTENCODING_IBM_869 },
|
|
{ "CSIBM869", RTL_TEXTENCODING_IBM_869 },
|
|
{ "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
|
|
{ "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
|
|
{ "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
|
|
{ "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
|
|
{ "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
|
|
{ "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
|
|
{ "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
|
|
{ "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
|
|
{ "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
|
|
{ "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
|
|
{ "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
|
|
{ "GB2312", RTL_TEXTENCODING_GB_2312 },
|
|
{ "CSGB2312", RTL_TEXTENCODING_GB_2312 },
|
|
{ "BIG5", RTL_TEXTENCODING_BIG5 },
|
|
{ "CSBIG5", RTL_TEXTENCODING_BIG5 },
|
|
{ "EUC-JP", RTL_TEXTENCODING_EUC_JP },
|
|
{ "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
|
|
RTL_TEXTENCODING_EUC_JP },
|
|
{ "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
|
|
{ "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
|
|
{ "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
|
|
{ "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
|
|
{ "KOI8-R", RTL_TEXTENCODING_KOI8_R },
|
|
{ "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
|
|
{ "UTF-7", RTL_TEXTENCODING_UTF7 },
|
|
{ "UTF-8", RTL_TEXTENCODING_UTF8 },
|
|
{ "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
|
|
{ "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
|
|
{ "EUC-KR", RTL_TEXTENCODING_EUC_KR },
|
|
{ "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
|
|
{ "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
|
|
{ "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
|
|
{ "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
|
|
{ "CSUCS4", RTL_TEXTENCODING_UCS4 },
|
|
{ "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
|
|
{ "CSUNICODE", RTL_TEXTENCODING_UCS2 } };
|
|
|
|
rtl_TextEncoding getCharsetEncoding(sal_Char const * pBegin,
|
|
sal_Char const * pEnd)
|
|
{
|
|
for (const EncodingEntry& i : aEncodingMap)
|
|
if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
|
|
return i.m_eEncoding;
|
|
return RTL_TEXTENCODING_DONTKNOW;
|
|
}
|
|
|
|
}
|
|
|
|
// INetMIME
|
|
|
|
// static
|
|
bool INetMIME::isAtomChar(sal_uInt32 nChar)
|
|
{
|
|
static const bool aMap[128]
|
|
= { false, false, false, false, false, false, false, false,
|
|
false, false, false, false, false, false, false, false,
|
|
false, false, false, false, false, false, false, false,
|
|
false, false, false, false, false, false, false, false,
|
|
false, true, false, true, true, true, true, true, // !"#$%&'
|
|
false, false, true, true, false, true, false, true, //()*+,-./
|
|
true, true, true, true, true, true, true, true, //01234567
|
|
true, true, false, false, false, true, false, true, //89:;<=>?
|
|
false, true, true, true, true, true, true, true, //@ABCDEFG
|
|
true, true, true, true, true, true, true, true, //HIJKLMNO
|
|
true, true, true, true, true, true, true, true, //PQRSTUVW
|
|
true, true, true, false, false, false, true, true, //XYZ[\]^_
|
|
true, true, true, true, true, true, true, true, //`abcdefg
|
|
true, true, true, true, true, true, true, true, //hijklmno
|
|
true, true, true, true, true, true, true, true, //pqrstuvw
|
|
true, true, true, true, true, true, true, false //xyz{|}~
|
|
};
|
|
return rtl::isAscii(nChar) && aMap[nChar];
|
|
}
|
|
|
|
// static
|
|
bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
|
|
{
|
|
static const bool aMap[128]
|
|
= { false, false, false, false, false, false, false, false,
|
|
false, false, false, false, false, false, false, false,
|
|
false, false, false, false, false, false, false, false,
|
|
false, false, false, false, false, false, false, false,
|
|
false, true, false, true, true, false, true, true, // !"#$%&'
|
|
false, false, false, true, true, true, true, true, //()*+,-./
|
|
true, true, true, true, true, true, true, true, //01234567
|
|
true, true, true, true, true, true, true, true, //89:;<=>?
|
|
true, true, true, true, true, true, true, true, //@ABCDEFG
|
|
true, true, true, true, true, true, true, true, //HIJKLMNO
|
|
true, true, true, true, true, true, true, true, //PQRSTUVW
|
|
true, true, true, true, false, true, true, true, //XYZ[\]^_
|
|
true, true, true, true, true, true, true, true, //`abcdefg
|
|
true, true, true, true, true, true, true, true, //hijklmno
|
|
true, true, true, true, true, true, true, true, //pqrstuvw
|
|
true, true, true, false, true, true, true, false //xyz{|}~
|
|
};
|
|
return rtl::isAscii(nChar) && aMap[nChar];
|
|
}
|
|
|
|
// static
|
|
bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
|
|
const sal_Unicode * pEnd1,
|
|
const sal_Char * pString2)
|
|
{
|
|
DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
|
|
"INetMIME::equalIgnoreCase(): Bad sequences");
|
|
|
|
while (*pString2 != 0)
|
|
if (pBegin1 == pEnd1
|
|
|| rtl::toAsciiUpperCase(*pBegin1++) != rtl::toAsciiUpperCase(*pString2++))
|
|
return false;
|
|
return pBegin1 == pEnd1;
|
|
}
|
|
|
|
// static
|
|
bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
|
|
const sal_Unicode * pEnd, bool bLeadingZeroes,
|
|
sal_uInt32 & rValue)
|
|
{
|
|
sal_uInt64 nTheValue = 0;
|
|
const sal_Unicode * p = rBegin;
|
|
for ( ; p != pEnd; ++p)
|
|
{
|
|
int nWeight = getWeight(*p);
|
|
if (nWeight < 0)
|
|
break;
|
|
nTheValue = 10 * nTheValue + nWeight;
|
|
if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
|
|
return false;
|
|
}
|
|
if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
|
|
return false;
|
|
rBegin = p;
|
|
rValue = sal_uInt32(nTheValue);
|
|
return true;
|
|
}
|
|
|
|
// static
|
|
sal_Unicode const * INetMIME::scanContentType(
|
|
sal_Unicode const * pBegin, sal_Unicode const * pEnd, OUString * pType,
|
|
OUString * pSubType, INetContentTypeParameterList * pParameters)
|
|
{
|
|
sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
|
|
sal_Unicode const * pTypeBegin = p;
|
|
while (p != pEnd && isTokenChar(*p))
|
|
{
|
|
++p;
|
|
}
|
|
if (p == pTypeBegin)
|
|
return nullptr;
|
|
sal_Unicode const * pTypeEnd = p;
|
|
|
|
p = skipLinearWhiteSpaceComment(p, pEnd);
|
|
if (p == pEnd || *p++ != '/')
|
|
return nullptr;
|
|
|
|
p = skipLinearWhiteSpaceComment(p, pEnd);
|
|
sal_Unicode const * pSubTypeBegin = p;
|
|
while (p != pEnd && isTokenChar(*p))
|
|
{
|
|
++p;
|
|
}
|
|
if (p == pSubTypeBegin)
|
|
return nullptr;
|
|
sal_Unicode const * pSubTypeEnd = p;
|
|
|
|
if (pType != nullptr)
|
|
{
|
|
*pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
|
|
}
|
|
if (pSubType != nullptr)
|
|
{
|
|
*pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
|
|
.toAsciiLowerCase();
|
|
}
|
|
|
|
return scanParameters(p, pEnd, pParameters);
|
|
}
|
|
|
|
// static
|
|
OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
|
|
{
|
|
// Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
|
|
// versions of StarOffice send mails with header fields where encoded
|
|
// words can be preceded by '=', ',', '.', '"', or '(', and followed by
|
|
// '=', ',', '.', '"', ')', without any required white space in between.
|
|
// And there appear to exist some broken mailers that only encode single
|
|
// letters within words, like "Appel
|
|
// =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
|
|
// detect encoded words even when not properly surrounded by white space.
|
|
|
|
// Non US-ASCII characters in rBody are treated as ISO-8859-1.
|
|
|
|
// encoded-word = "=?"
|
|
// 1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
|
|
// ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
|
|
// ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
|
|
// / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
|
|
// "?="
|
|
|
|
// base64 = ALPHA / DIGIT / "+" / "/"
|
|
|
|
const sal_Char * pBegin = rBody.getStr();
|
|
const sal_Char * pEnd = pBegin + rBody.getLength();
|
|
|
|
OUString sDecoded;
|
|
const sal_Char * pCopyBegin = pBegin;
|
|
|
|
/* bool bStartEncodedWord = true; */
|
|
const sal_Char * pWSPBegin = pBegin;
|
|
|
|
for (const sal_Char * p = pBegin; p != pEnd;)
|
|
{
|
|
OUString sEncodedText;
|
|
if (p != pEnd && *p == '=' /* && bStartEncodedWord */)
|
|
{
|
|
const sal_Char * q = p + 1;
|
|
bool bEncodedWord = q != pEnd && *q++ == '?';
|
|
|
|
rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
|
|
if (bEncodedWord)
|
|
{
|
|
const sal_Char * pCharsetBegin = q;
|
|
const sal_Char * pLanguageBegin = nullptr;
|
|
int nAlphaCount = 0;
|
|
for (bool bDone = false; !bDone;)
|
|
if (q == pEnd)
|
|
{
|
|
bEncodedWord = false;
|
|
bDone = true;
|
|
}
|
|
else
|
|
{
|
|
sal_Char cChar = *q++;
|
|
switch (cChar)
|
|
{
|
|
case '*':
|
|
pLanguageBegin = q - 1;
|
|
nAlphaCount = 0;
|
|
break;
|
|
|
|
case '-':
|
|
if (pLanguageBegin != nullptr)
|
|
{
|
|
if (nAlphaCount == 0)
|
|
pLanguageBegin = nullptr;
|
|
else
|
|
nAlphaCount = 0;
|
|
}
|
|
break;
|
|
|
|
case '?':
|
|
if (pCharsetBegin == q - 1)
|
|
bEncodedWord = false;
|
|
else
|
|
{
|
|
eCharsetEncoding
|
|
= getCharsetEncoding(
|
|
pCharsetBegin,
|
|
pLanguageBegin == nullptr
|
|
|| nAlphaCount == 0 ?
|
|
q - 1 : pLanguageBegin);
|
|
bEncodedWord = isMIMECharsetEncoding(
|
|
eCharsetEncoding);
|
|
eCharsetEncoding
|
|
= translateFromMIME(eCharsetEncoding);
|
|
}
|
|
bDone = true;
|
|
break;
|
|
|
|
default:
|
|
if (pLanguageBegin != nullptr
|
|
&& (!rtl::isAsciiAlpha(cChar) || ++nAlphaCount > 8))
|
|
pLanguageBegin = nullptr;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
bool bEncodingB = false;
|
|
if (bEncodedWord)
|
|
{
|
|
if (q == pEnd)
|
|
bEncodedWord = false;
|
|
else
|
|
{
|
|
switch (*q++)
|
|
{
|
|
case 'B':
|
|
case 'b':
|
|
bEncodingB = true;
|
|
break;
|
|
|
|
case 'Q':
|
|
case 'q':
|
|
bEncodingB = false;
|
|
break;
|
|
|
|
default:
|
|
bEncodedWord = false;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';
|
|
|
|
OStringBuffer sText;
|
|
if (bEncodedWord)
|
|
{
|
|
if (bEncodingB)
|
|
{
|
|
for (bool bDone = false; !bDone;)
|
|
{
|
|
if (pEnd - q < 4)
|
|
{
|
|
bEncodedWord = false;
|
|
bDone = true;
|
|
}
|
|
else
|
|
{
|
|
bool bFinal = false;
|
|
int nCount = 3;
|
|
sal_uInt32 nValue = 0;
|
|
for (int nShift = 18; nShift >= 0; nShift -= 6)
|
|
{
|
|
int nWeight = getBase64Weight(*q++);
|
|
if (nWeight == -2)
|
|
{
|
|
bEncodedWord = false;
|
|
bDone = true;
|
|
break;
|
|
}
|
|
if (nWeight == -1)
|
|
{
|
|
if (!bFinal)
|
|
{
|
|
if (nShift >= 12)
|
|
{
|
|
bEncodedWord = false;
|
|
bDone = true;
|
|
break;
|
|
}
|
|
bFinal = true;
|
|
nCount = nShift == 6 ? 1 : 2;
|
|
}
|
|
}
|
|
else
|
|
nValue |= nWeight << nShift;
|
|
}
|
|
if (bEncodedWord)
|
|
{
|
|
for (int nShift = 16; nCount-- > 0; nShift -= 8)
|
|
sText.append(sal_Char(nValue >> nShift & 0xFF));
|
|
if (*q == '?')
|
|
{
|
|
++q;
|
|
bDone = true;
|
|
}
|
|
if (bFinal && !bDone)
|
|
{
|
|
bEncodedWord = false;
|
|
bDone = true;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
else
|
|
{
|
|
const sal_Char * pEncodedTextBegin = q;
|
|
const sal_Char * pEncodedTextCopyBegin = q;
|
|
for (bool bDone = false; !bDone;)
|
|
if (q == pEnd)
|
|
{
|
|
bEncodedWord = false;
|
|
bDone = true;
|
|
}
|
|
else
|
|
{
|
|
sal_uInt32 nChar = *q++;
|
|
switch (nChar)
|
|
{
|
|
case '=':
|
|
{
|
|
if (pEnd - q < 2)
|
|
{
|
|
bEncodedWord = false;
|
|
bDone = true;
|
|
break;
|
|
}
|
|
int nDigit1 = getHexWeight(q[0]);
|
|
int nDigit2 = getHexWeight(q[1]);
|
|
if (nDigit1 < 0 || nDigit2 < 0)
|
|
{
|
|
bEncodedWord = false;
|
|
bDone = true;
|
|
break;
|
|
}
|
|
sText.append(rBody.copy(
|
|
(pEncodedTextCopyBegin - pBegin),
|
|
(q - 1 - pEncodedTextCopyBegin)));
|
|
sText.append(sal_Char(nDigit1 << 4 | nDigit2));
|
|
q += 2;
|
|
pEncodedTextCopyBegin = q;
|
|
break;
|
|
}
|
|
|
|
case '?':
|
|
if (q - pEncodedTextBegin > 1)
|
|
sText.append(rBody.copy(
|
|
(pEncodedTextCopyBegin - pBegin),
|
|
(q - 1 - pEncodedTextCopyBegin)));
|
|
else
|
|
bEncodedWord = false;
|
|
bDone = true;
|
|
break;
|
|
|
|
case '_':
|
|
sText.append(rBody.copy(
|
|
(pEncodedTextCopyBegin - pBegin),
|
|
(q - 1 - pEncodedTextCopyBegin)));
|
|
sText.append(' ');
|
|
pEncodedTextCopyBegin = q;
|
|
break;
|
|
|
|
default:
|
|
if (!isVisible(nChar))
|
|
{
|
|
bEncodedWord = false;
|
|
bDone = true;
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';
|
|
|
|
sal_Unicode * pUnicodeBuffer = nullptr;
|
|
sal_Size nUnicodeSize = 0;
|
|
if (bEncodedWord)
|
|
{
|
|
pUnicodeBuffer
|
|
= convertToUnicode(sText.getStr(),
|
|
sText.getStr() + sText.getLength(),
|
|
eCharsetEncoding, nUnicodeSize);
|
|
if (pUnicodeBuffer == nullptr)
|
|
bEncodedWord = false;
|
|
}
|
|
|
|
if (bEncodedWord)
|
|
{
|
|
appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
|
|
sDecoded += OUString(
|
|
pUnicodeBuffer,
|
|
static_cast< sal_Int32 >(nUnicodeSize));
|
|
delete[] pUnicodeBuffer;
|
|
p = q;
|
|
pCopyBegin = p;
|
|
|
|
pWSPBegin = p;
|
|
while (p != pEnd && isWhiteSpace(*p))
|
|
++p;
|
|
/* bStartEncodedWord = p != pWSPBegin; */
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (!sEncodedText.isEmpty())
|
|
sDecoded += sEncodedText;
|
|
|
|
if (p == pEnd)
|
|
break;
|
|
|
|
switch (*p++)
|
|
{
|
|
case '"':
|
|
/* bStartEncodedWord = true; */
|
|
break;
|
|
|
|
case '(':
|
|
/* bStartEncodedWord = true; */
|
|
break;
|
|
|
|
case ')':
|
|
/* bStartEncodedWord = false; */
|
|
break;
|
|
|
|
default:
|
|
{
|
|
const sal_Char * pUTF8Begin = p - 1;
|
|
const sal_Char * pUTF8End = pUTF8Begin;
|
|
sal_uInt32 nCharacter = 0;
|
|
if (translateUTF8Char(pUTF8End, pEnd, RTL_TEXTENCODING_UCS4,
|
|
nCharacter))
|
|
{
|
|
appendISO88591(sDecoded, pCopyBegin, p - 1);
|
|
sal_Unicode aUTF16Buf[2];
|
|
sal_Int32 nUTF16Len = putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf;
|
|
sDecoded += OUString(aUTF16Buf, nUTF16Len);
|
|
p = pUTF8End;
|
|
pCopyBegin = p;
|
|
}
|
|
/* bStartEncodedWord = false; */
|
|
break;
|
|
}
|
|
}
|
|
pWSPBegin = p;
|
|
}
|
|
|
|
appendISO88591(sDecoded, pCopyBegin, pEnd);
|
|
return sDecoded;
|
|
}
|
|
|
|
void INetMIMEOutputSink::writeSequence(const sal_Char * pBegin,
|
|
const sal_Char * pEnd)
|
|
{
|
|
OSL_ENSURE(pBegin && pBegin <= pEnd,
|
|
"INetMIMEOutputSink::writeSequence(): Bad sequence");
|
|
|
|
m_aBuffer.append(pBegin, pEnd - pBegin);
|
|
}
|
|
|
|
void INetMIMEOutputSink::writeSequence(const sal_Char * pSequence)
|
|
{
|
|
sal_Size nLength = rtl_str_getLength(pSequence);
|
|
writeSequence(pSequence, pSequence + nLength);
|
|
}
|
|
|
|
void INetMIMEOutputSink::writeSequence(const sal_Unicode * pBegin,
|
|
const sal_Unicode * pEnd)
|
|
{
|
|
assert(pBegin && pBegin <= pEnd &&
|
|
"INetMIMEOutputSink::writeSequence(): Bad sequence");
|
|
|
|
std::unique_ptr<sal_Char[]> pBufferBegin( new sal_Char[pEnd - pBegin] );
|
|
sal_Char * pBufferEnd = pBufferBegin.get();
|
|
while (pBegin != pEnd)
|
|
{
|
|
DBG_ASSERT(*pBegin < 256,
|
|
"INetMIMEOutputSink::writeSequence(): Bad octet");
|
|
*pBufferEnd++ = sal_Char(*pBegin++);
|
|
}
|
|
writeSequence(pBufferBegin.get(), pBufferEnd);
|
|
}
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|