office-gobmx/tools/source/inet/inetmime.cxx

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
 * This file is part of the LibreOffice project.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 *
 * This file incorporates work covered by the following license notice:
 *
 *   Licensed to the Apache Software Foundation (ASF) under one or more
 *   contributor license agreements. See the NOTICE file distributed
 *   with this work for additional information regarding copyright
 *   ownership. The ASF licenses this file to you under the Apache
 *   License, Version 2.0 (the "License"); you may not use this file
 *   except in compliance with the License. You may obtain a copy of
 *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
 */

#include <cstddef>
#include <limits>
#include <memory>

#include <osl/diagnose.h>
#include <rtl/ustring.hxx>
#include <rtl/strbuf.hxx>
#include <rtl/tencinfo.h>
#include <tools/inetmime.hxx>
#include <rtl/character.hxx>

namespace {

/** Check for US-ASCII white space character.

    @param nChar  Some UCS-4 character.

    @return  True if nChar is a US-ASCII white space character (US-ASCII
    0x09 or 0x20).
 */
inline bool isWhiteSpace(sal_uInt32 nChar);

/** Check whether some character is valid within an RFC 2045 <token>.

    @param nChar  Some UCS-4 character.

    @return  True if nChar is valid within an RFC 2047 <token> (US-ASCII
    'A'--'Z', 'a'--'z', '0'--'9', '!', '#', '$', '%', '&', ''', '*', '+',
    '-', '.', '^', '_', '`', '{', '|', '}', or '~').
 */
bool isTokenChar(sal_uInt32 nChar);

/** Get the Base 64 digit weight of a US-ASCII character.

    @param nChar  Some UCS-4 character.

    @return  If nChar is a US-ASCII Base 64 digit character (US-ASCII
    'A'--'F', or 'a'--'f', '0'--'9', '+', or '/'), return the
    corresponding weight (0--63); if nChar is the US-ASCII Base 64 padding
    character (US-ASCII '='), return -1; otherwise, return -2.
 */
inline int getBase64Weight(sal_uInt32 nChar);

inline bool startsWithLineFolding(const sal_Unicode * pBegin,
                                         const sal_Unicode * pEnd);

const sal_Unicode * skipComment(const sal_Unicode * pBegin,
                                       const sal_Unicode * pEnd);

const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
                                                           pBegin,
                                                       const sal_Unicode *
                                                           pEnd);

const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
                                            const sal_Unicode * pEnd);

sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
                                          sal_Unicode const * pEnd,
                                          INetContentTypeParameterList *
                                              pParameters);

inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
                                                     eEncoding);

rtl_TextEncoding getCharsetEncoding(const sal_Char * pBegin,
                                           const sal_Char * pEnd);

inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding);

sal_Unicode * convertToUnicode(const sal_Char * pBegin,
                                      const sal_Char * pEnd,
                                      rtl_TextEncoding eEncoding,
                                      sal_Size & rSize);

sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
                                     const sal_Unicode * pEnd,
                                     rtl_TextEncoding eEncoding,
                                     sal_Size & rSize);

void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar);

bool translateUTF8Char(const sal_Char *& rBegin,
                              const sal_Char * pEnd,
                              rtl_TextEncoding eEncoding,
                              sal_uInt32 & rCharacter);

/** Put the UTF-16 encoding of a UTF-32 character into a buffer.

    @param pBuffer  Points to a buffer, must not be null.

    @param nUTF32  An UTF-32 character, must be in the range 0..0x10FFFF.

    @return  A pointer past the UTF-16 characters put into the buffer
    (i.e., pBuffer + 1 or pBuffer + 2).
 */
inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
                                              sal_uInt32 nUTF32);

inline bool isWhiteSpace(sal_uInt32 nChar)
{
    return nChar == '\t' || nChar == ' ';
}

inline int getBase64Weight(sal_uInt32 nChar)
{
    return rtl::isAsciiUpperCase(nChar) ? int(nChar - 'A') :
           rtl::isAsciiLowerCase(nChar) ? int(nChar - 'a' + 26) :
           rtl::isAsciiDigit(nChar) ? int(nChar - '0' + 52) :
           nChar == '+' ? 62 :
           nChar == '/' ? 63 :
           nChar == '=' ? -1 : -2;
}

inline bool startsWithLineFolding(const sal_Unicode * pBegin,
                                            const sal_Unicode * pEnd)
{
    DBG_ASSERT(pBegin && pBegin <= pEnd,
               "startsWithLineFolding(): Bad sequence");

    return pEnd - pBegin >= 3 && pBegin[0] == 0x0D && pBegin[1] == 0x0A
           && isWhiteSpace(pBegin[2]); // CR, LF
}

inline rtl_TextEncoding translateFromMIME(rtl_TextEncoding
                                                        eEncoding)
{
#if defined(_WIN32)
    return eEncoding == RTL_TEXTENCODING_ISO_8859_1 ?
               RTL_TEXTENCODING_MS_1252 : eEncoding;
#else
    return eEncoding;
#endif
}

inline bool isMIMECharsetEncoding(rtl_TextEncoding eEncoding)
{
    return rtl_isOctetTextEncoding(eEncoding);
}

sal_Unicode * convertToUnicode(const sal_Char * pBegin,
                                         const sal_Char * pEnd,
                                         rtl_TextEncoding eEncoding,
                                         sal_Size & rSize)
{
    if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
        return nullptr;
    rtl_TextToUnicodeConverter hConverter
        = rtl_createTextToUnicodeConverter(eEncoding);
    rtl_TextToUnicodeContext hContext
        = rtl_createTextToUnicodeContext(hConverter);
    sal_Unicode * pBuffer;
    sal_uInt32 nInfo;
    for (sal_Size nBufferSize = pEnd - pBegin;;
         nBufferSize += nBufferSize / 3 + 1)
    {
        pBuffer = new sal_Unicode[nBufferSize];
        sal_Size nSrcCvtBytes;
        rSize = rtl_convertTextToUnicode(
                    hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
                    nBufferSize,
                    RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
                        | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
                        | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR,
                    &nInfo, &nSrcCvtBytes);
        if (nInfo != RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
            break;
        delete[] pBuffer;
        rtl_resetTextToUnicodeContext(hConverter, hContext);
    }
    rtl_destroyTextToUnicodeContext(hConverter, hContext);
    rtl_destroyTextToUnicodeConverter(hConverter);
    if (nInfo != 0)
    {
        delete[] pBuffer;
        pBuffer = nullptr;
    }
    return pBuffer;
}

sal_Char * convertFromUnicode(const sal_Unicode * pBegin,
                                        const sal_Unicode * pEnd,
                                        rtl_TextEncoding eEncoding,
                                        sal_Size & rSize)
{
    if (eEncoding == RTL_TEXTENCODING_DONTKNOW)
        return nullptr;
    rtl_UnicodeToTextConverter hConverter
        = rtl_createUnicodeToTextConverter(eEncoding);
    rtl_UnicodeToTextContext hContext
        = rtl_createUnicodeToTextContext(hConverter);
    sal_Char * pBuffer;
    sal_uInt32 nInfo;
    for (sal_Size nBufferSize = pEnd - pBegin;;
         nBufferSize += nBufferSize / 3 + 1)
    {
        pBuffer = new sal_Char[nBufferSize];
        sal_Size nSrcCvtBytes;
        rSize = rtl_convertUnicodeToText(
                    hConverter, hContext, pBegin, pEnd - pBegin, pBuffer,
                    nBufferSize,
                    RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
                        | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
                        | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE
                        | RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR,
                    &nInfo, &nSrcCvtBytes);
        if (nInfo != RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL)
            break;
        delete[] pBuffer;
        rtl_resetUnicodeToTextContext(hConverter, hContext);
    }
    rtl_destroyUnicodeToTextContext(hConverter, hContext);
    rtl_destroyUnicodeToTextConverter(hConverter);
    if (nInfo != 0)
    {
        delete[] pBuffer;
        pBuffer = nullptr;
    }
    return pBuffer;
}

inline sal_Unicode * putUTF32Character(sal_Unicode * pBuffer,
                                                 sal_uInt32 nUTF32)
{
    DBG_ASSERT(rtl::isUnicodeCodePoint(nUTF32), "putUTF32Character(): Bad char");
    if (nUTF32 < 0x10000)
        *pBuffer++ = sal_Unicode(nUTF32);
    else
    {
        nUTF32 -= 0x10000;
        *pBuffer++ = sal_Unicode(0xD800 | (nUTF32 >> 10));
        *pBuffer++ = sal_Unicode(0xDC00 | (nUTF32 & 0x3FF));
    }
    return pBuffer;
}

void writeUTF8(INetMIMEOutputSink & rSink, sal_uInt32 nChar)
{
    // See RFC 2279 for a discussion of UTF-8.
    DBG_ASSERT(nChar < 0x80000000, "writeUTF8(): Bad char");

    if (nChar < 0x80)
        rSink << sal_Char(nChar);
    else if (nChar < 0x800)
        rSink << sal_Char(nChar >> 6 | 0xC0)
              << sal_Char((nChar & 0x3F) | 0x80);
    else if (nChar < 0x10000)
        rSink << sal_Char(nChar >> 12 | 0xE0)
              << sal_Char((nChar >> 6 & 0x3F) | 0x80)
              << sal_Char((nChar & 0x3F) | 0x80);
    else if (nChar < 0x200000)
        rSink << sal_Char(nChar >> 18 | 0xF0)
              << sal_Char((nChar >> 12 & 0x3F) | 0x80)
              << sal_Char((nChar >> 6 & 0x3F) | 0x80)
              << sal_Char((nChar & 0x3F) | 0x80);
    else if (nChar < 0x4000000)
        rSink << sal_Char(nChar >> 24 | 0xF8)
              << sal_Char((nChar >> 18 & 0x3F) | 0x80)
              << sal_Char((nChar >> 12 & 0x3F) | 0x80)
              << sal_Char((nChar >> 6 & 0x3F) | 0x80)
              << sal_Char((nChar & 0x3F) | 0x80);
    else
        rSink << sal_Char(nChar >> 30 | 0xFC)
              << sal_Char((nChar >> 24 & 0x3F) | 0x80)
              << sal_Char((nChar >> 18 & 0x3F) | 0x80)
              << sal_Char((nChar >> 12 & 0x3F) | 0x80)
              << sal_Char((nChar >> 6 & 0x3F) | 0x80)
              << sal_Char((nChar & 0x3F) | 0x80);
}

bool translateUTF8Char(const sal_Char *& rBegin,
                                 const sal_Char * pEnd,
                                 rtl_TextEncoding eEncoding,
                                 sal_uInt32 & rCharacter)
{
    if (rBegin == pEnd || static_cast< unsigned char >(*rBegin) < 0x80
        || static_cast< unsigned char >(*rBegin) >= 0xFE)
        return false;

    int nCount;
    sal_uInt32 nMin;
    sal_uInt32 nUCS4;
    const sal_Char * p = rBegin;
    if (static_cast< unsigned char >(*p) < 0xE0)
    {
        nCount = 1;
        nMin = 0x80;
        nUCS4 = static_cast< unsigned char >(*p) & 0x1F;
    }
    else if (static_cast< unsigned char >(*p) < 0xF0)
    {
        nCount = 2;
        nMin = 0x800;
        nUCS4 = static_cast< unsigned char >(*p) & 0xF;
    }
    else if (static_cast< unsigned char >(*p) < 0xF8)
    {
        nCount = 3;
        nMin = 0x10000;
        nUCS4 = static_cast< unsigned char >(*p) & 7;
    }
    else if (static_cast< unsigned char >(*p) < 0xFC)
    {
        nCount = 4;
        nMin = 0x200000;
        nUCS4 = static_cast< unsigned char >(*p) & 3;
    }
    else
    {
        nCount = 5;
        nMin = 0x4000000;
        nUCS4 = static_cast< unsigned char >(*p) & 1;
    }
    ++p;

    for (; nCount-- > 0; ++p)
        if ((static_cast< unsigned char >(*p) & 0xC0) == 0x80)
            nUCS4 = (nUCS4 << 6) | (static_cast< unsigned char >(*p) & 0x3F);
        else
            return false;

    if (!rtl::isUnicodeCodePoint(nUCS4) || nUCS4 < nMin)
        return false;

    if (eEncoding >= RTL_TEXTENCODING_UCS4)
        rCharacter = nUCS4;
    else
    {
        sal_Unicode aUTF16[2];
        const sal_Unicode * pUTF16End = putUTF32Character(aUTF16, nUCS4);
        sal_Size nSize;
        sal_Char * pBuffer = convertFromUnicode(aUTF16, pUTF16End, eEncoding,
                                                nSize);
        if (!pBuffer)
            return false;
        DBG_ASSERT(nSize == 1,
                   "translateUTF8Char(): Bad conversion");
        rCharacter = *pBuffer;
        delete[] pBuffer;
    }
    rBegin = p;
    return true;
}

void appendISO88591(OUString & rText, sal_Char const * pBegin,
                    sal_Char const * pEnd);

struct Parameter
{
    Parameter * m_pNext;
    OString m_aAttribute;
    OString m_aCharset;
    OString m_aValue;
    sal_uInt32 m_nSection;
    bool m_bExtended;

    inline Parameter(Parameter * pTheNext, const OString& rTheAttribute,
                     const OString& rTheCharset,
                     const OString& rTheValue, sal_uInt32 nTheSection,
                     bool bTheExtended);
};

inline Parameter::Parameter(Parameter * pTheNext,
                            const OString& rTheAttribute,
                            const OString& rTheCharset,
                            const OString& rTheValue,
                            sal_uInt32 nTheSection, bool bTheExtended):
    m_pNext(pTheNext),
    m_aAttribute(rTheAttribute),
    m_aCharset(rTheCharset),
    m_aValue(rTheValue),
    m_nSection(nTheSection),
    m_bExtended(bTheExtended)
{}

struct ParameterList
{
    Parameter * m_pList;

    ParameterList(): m_pList(nullptr) {}

    inline ~ParameterList();

    Parameter ** find(const OString& rAttribute, sal_uInt32 nSection,
                      bool & rPresent);
};

inline ParameterList::~ParameterList()
{
    while (m_pList)
    {
        Parameter * pNext = m_pList->m_pNext;
        delete m_pList;
        m_pList = pNext;
    }
}

bool parseParameters(ParameterList const & rInput,
                     INetContentTypeParameterList * pOutput);

//  appendISO88591

void appendISO88591(OUString & rText, sal_Char const * pBegin,
                    sal_Char const * pEnd)
{
    sal_Int32 nLength = pEnd - pBegin;
    std::unique_ptr<sal_Unicode[]> pBuffer(new sal_Unicode[nLength]);
    for (sal_Unicode * p = pBuffer.get(); pBegin != pEnd;)
        *p++ = static_cast<unsigned char>(*pBegin++);
    rText += OUString(pBuffer.get(), nLength);
}

//  ParameterList

Parameter ** ParameterList::find(const OString& rAttribute,
                                 sal_uInt32 nSection, bool & rPresent)
{
    Parameter ** p = &m_pList;
    for (; *p; p = &(*p)->m_pNext)
    {
        sal_Int32 nCompare = rAttribute.compareTo((*p)->m_aAttribute);
        if (nCompare > 0)
            break;
        else if (nCompare == 0)
        {
            if (nSection > (*p)->m_nSection)
                break;
            else if (nSection == (*p)->m_nSection)
            {
                rPresent = true;
                return p;
            }
        }
    }
    rPresent = false;
    return p;
}

//  parseParameters

bool parseParameters(ParameterList const & rInput,
                     INetContentTypeParameterList * pOutput)
{
    if (pOutput)
        pOutput->clear();

    Parameter * pPrev = nullptr;
    for (Parameter * p = rInput.m_pList; p; p = p->m_pNext)
    {
        if (p->m_nSection > 0
            && (!pPrev
                || pPrev->m_nSection != p->m_nSection - 1
                || pPrev->m_aAttribute != p->m_aAttribute))
            return false;
        pPrev = p;
    }

    if (pOutput)
        for (Parameter * p = rInput.m_pList; p;)
        {
            bool bCharset = !p->m_aCharset.isEmpty();
            rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
            if (bCharset)
                eEncoding
                    = getCharsetEncoding(p->m_aCharset.getStr(),
                                                   p->m_aCharset.getStr()
                                                       + rInput.m_pList->
                                                             m_aCharset.
                                                                 getLength());
            OUString aValue;
            bool bBadEncoding = false;
            Parameter * pNext = p;
            do
            {
                sal_Size nSize;
                sal_Unicode * pUnicode
                    = convertToUnicode(pNext->m_aValue.getStr(),
                                                 pNext->m_aValue.getStr()
                                                     + pNext->m_aValue.getLength(),
                                                 bCharset && p->m_bExtended ?
                                                     eEncoding :
                                                     RTL_TEXTENCODING_UTF8,
                                                 nSize);
                if (!pUnicode && !(bCharset && p->m_bExtended))
                    pUnicode = convertToUnicode(
                                   pNext->m_aValue.getStr(),
                                   pNext->m_aValue.getStr()
                                       + pNext->m_aValue.getLength(),
                                   RTL_TEXTENCODING_ISO_8859_1, nSize);
                if (!pUnicode)
                {
                    bBadEncoding = true;
                    break;
                }
                aValue += OUString(pUnicode, static_cast<sal_Int32>(nSize));
                delete[] pUnicode;
                pNext = pNext->m_pNext;
            }
            while (pNext && pNext->m_nSection > 0);
            if (bBadEncoding)
            {
                aValue.clear();
                for (pNext = p;;)
                {
                    if (pNext->m_bExtended)
                    {
                        for (sal_Int32 i = 0; i < pNext->m_aValue.getLength(); ++i)
                            aValue += OUStringLiteral1(
                                sal_Unicode(
                                    static_cast<unsigned char>(pNext->m_aValue[i]))
                                | 0xF800);
                    }
                    else
                    {
                        for (sal_Int32 i = 0; i < pNext->m_aValue.getLength(); ++i)
                            aValue += OUStringLiteral1( static_cast<unsigned char>(pNext->m_aValue[i]) );
                    }
                    pNext = pNext->m_pNext;
                    if (!pNext || pNext->m_nSection == 0)
                        break;
                };
            }
            INetContentTypeParameter x {aValue}; // workaround ICE in VisualStudio2013
            auto const ret = pOutput->insert({p->m_aAttribute, x });
            SAL_INFO_IF(!ret.second, "tools",
                "INetMIME: dropping duplicate parameter: " << p->m_aAttribute);
            p = pNext;
        }
    return true;
}

bool isTokenChar(sal_uInt32 nChar)
{
    static const bool aMap[128]
        = { false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false,
            false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
            false, false,  true,  true, false,  true,  true, false, //()*+,-./
             true,  true,  true,  true,  true,  true,  true,  true, //01234567
             true,  true, false, false, false, false, false, false, //89:;<=>?
            false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
             true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
             true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
             true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
             true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
             true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
             true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
             true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
          };
    return rtl::isAscii(nChar) && aMap[nChar];
}

const sal_Unicode * skipComment(const sal_Unicode * pBegin,
                                          const sal_Unicode * pEnd)
{
    DBG_ASSERT(pBegin && pBegin <= pEnd,
               "skipComment(): Bad sequence");

    if (pBegin != pEnd && *pBegin == '(')
    {
        sal_uInt32 nLevel = 0;
        for (const sal_Unicode * p = pBegin; p != pEnd;)
            switch (*p++)
            {
                case '(':
                    ++nLevel;
                    break;

                case ')':
                    if (--nLevel == 0)
                        return p;
                    break;

                case '\\':
                    if (p != pEnd)
                        ++p;
                    break;
            }
    }
    return pBegin;
}

const sal_Unicode * skipLinearWhiteSpaceComment(const sal_Unicode *
                                                              pBegin,
                                                          const sal_Unicode *
                                                              pEnd)
{
    DBG_ASSERT(pBegin && pBegin <= pEnd,
               "skipLinearWhiteSpaceComment(): Bad sequence");

    while (pBegin != pEnd)
        switch (*pBegin)
        {
            case '\t':
            case ' ':
                ++pBegin;
                break;

            case 0x0D: // CR
                if (startsWithLineFolding(pBegin, pEnd))
                    pBegin += 3;
                else
                    return pBegin;
                break;

            case '(':
            {
                const sal_Unicode * p = skipComment(pBegin, pEnd);
                if (p == pBegin)
                    return pBegin;
                pBegin = p;
                break;
            }

            default:
                return pBegin;
        }
    return pBegin;
}

const sal_Unicode * skipQuotedString(const sal_Unicode * pBegin,
                                               const sal_Unicode * pEnd)
{
    DBG_ASSERT(pBegin && pBegin <= pEnd,
               "skipQuotedString(): Bad sequence");

    if (pBegin != pEnd && *pBegin == '"')
        for (const sal_Unicode * p = pBegin + 1; p != pEnd;)
            switch (*p++)
            {
                case 0x0D: // CR
                    if (pEnd - p < 2 || *p++ != 0x0A // LF
                        || !isWhiteSpace(*p++))
                        return pBegin;
                    break;

                case '"':
                    return p;

                case '\\':
                    if (p != pEnd)
                        ++p;
                    break;
            }
    return pBegin;
}

sal_Unicode const * scanParameters(sal_Unicode const * pBegin,
                                             sal_Unicode const * pEnd,
                                             INetContentTypeParameterList *
                                                 pParameters)
{
    ParameterList aList;
    sal_Unicode const * pParameterBegin = pBegin;
    for (sal_Unicode const * p = pParameterBegin;;)
    {
        pParameterBegin = skipLinearWhiteSpaceComment(p, pEnd);
        if (pParameterBegin == pEnd || *pParameterBegin != ';')
            break;
        p = pParameterBegin + 1;

        sal_Unicode const * pAttributeBegin
            = skipLinearWhiteSpaceComment(p, pEnd);
        p = pAttributeBegin;
        bool bDowncaseAttribute = false;
        while (p != pEnd && isTokenChar(*p) && *p != '*')
        {
            bDowncaseAttribute = bDowncaseAttribute || rtl::isAsciiUpperCase(*p);
            ++p;
        }
        if (p == pAttributeBegin)
            break;
        OString aAttribute = OString(
            pAttributeBegin, p - pAttributeBegin,
            RTL_TEXTENCODING_ASCII_US);
        if (bDowncaseAttribute)
            aAttribute = aAttribute.toAsciiLowerCase();

        sal_uInt32 nSection = 0;
        if (p != pEnd && *p == '*')
        {
            ++p;
            if (p != pEnd && rtl::isAsciiDigit(*p)
                && !INetMIME::scanUnsigned(p, pEnd, false, nSection))
                break;
        }

        bool bPresent;
        Parameter ** pPos = aList.find(aAttribute, nSection, bPresent);
        if (bPresent)
            break;

        bool bExtended = false;
        if (p != pEnd && *p == '*')
        {
            ++p;
            bExtended = true;
        }

        p = skipLinearWhiteSpaceComment(p, pEnd);

        if (p == pEnd || *p != '=')
            break;

        p = skipLinearWhiteSpaceComment(p + 1, pEnd);

        OString aCharset;
        OString aLanguage;
        OString aValue;
        if (bExtended)
        {
            if (nSection == 0)
            {
                sal_Unicode const * pCharsetBegin = p;
                bool bDowncaseCharset = false;
                while (p != pEnd && isTokenChar(*p) && *p != '\'')
                {
                    bDowncaseCharset = bDowncaseCharset || rtl::isAsciiUpperCase(*p);
                    ++p;
                }
                if (p == pCharsetBegin)
                    break;
                if (pParameters)
                {
                    aCharset = OString(
                        pCharsetBegin,
                        p - pCharsetBegin,
                        RTL_TEXTENCODING_ASCII_US);
                    if (bDowncaseCharset)
                        aCharset = aCharset.toAsciiLowerCase();
                }

                if (p == pEnd || *p != '\'')
                    break;
                ++p;

                sal_Unicode const * pLanguageBegin = p;
                bool bDowncaseLanguage = false;
                int nLetters = 0;
                for (; p != pEnd; ++p)
                    if (rtl::isAsciiAlpha(*p))
                    {
                        if (++nLetters > 8)
                            break;
                        bDowncaseLanguage = bDowncaseLanguage
                                            || rtl::isAsciiUpperCase(*p);
                    }
                    else if (*p == '-')
                    {
                        if (nLetters == 0)
                            break;
                        nLetters = 0;
                    }
                    else
                        break;
                if (nLetters == 0 || nLetters > 8)
                    break;
                if (pParameters)
                {
                    aLanguage = OString(
                        pLanguageBegin,
                        p - pLanguageBegin,
                        RTL_TEXTENCODING_ASCII_US);
                    if (bDowncaseLanguage)
                        aLanguage = aLanguage.toAsciiLowerCase();
                }

                if (p == pEnd || *p != '\'')
                    break;
                ++p;
            }
            if (pParameters)
            {
                INetMIMEOutputSink aSink;
                while (p != pEnd)
                {
                    sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
                    if (rtl::isAscii(nChar) && !isTokenChar(nChar))
                        break;
                    if (nChar == '%' && p + 1 < pEnd)
                    {
                        int nWeight1 = INetMIME::getHexWeight(p[0]);
                        int nWeight2 = INetMIME::getHexWeight(p[1]);
                        if (nWeight1 >= 0 && nWeight2 >= 0)
                        {
                            aSink << sal_Char(nWeight1 << 4 | nWeight2);
                            p += 2;
                            continue;
                        }
                    }
                    writeUTF8(aSink, nChar);
                }
                aValue = aSink.takeBuffer();
            }
            else
                while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
                    ++p;
        }
        else if (p != pEnd && *p == '"')
            if (pParameters)
            {
                INetMIMEOutputSink aSink;
                bool bInvalid = false;
                for (++p;;)
                {
                    if (p == pEnd)
                    {
                        bInvalid = true;
                        break;
                    }
                    sal_uInt32 nChar = INetMIME::getUTF32Character(p, pEnd);
                    if (nChar == '"')
                        break;
                    else if (nChar == 0x0D) // CR
                    {
                        if (pEnd - p < 2 || *p++ != 0x0A // LF
                            || !isWhiteSpace(*p))
                        {
                            bInvalid = true;
                            break;
                        }
                        nChar = static_cast<unsigned char>(*p++);
                    }
                    else if (nChar == '\\')
                    {
                        if (p == pEnd)
                        {
                            bInvalid = true;
                            break;
                        }
                        nChar = INetMIME::getUTF32Character(p, pEnd);
                    }
                    writeUTF8(aSink, nChar);
                }
                if (bInvalid)
                    break;
                aValue = aSink.takeBuffer();
            }
            else
            {
                sal_Unicode const * pStringEnd = skipQuotedString(p, pEnd);
                if (p == pStringEnd)
                    break;
                p = pStringEnd;
            }
        else
        {
            sal_Unicode const * pTokenBegin = p;
            while (p != pEnd && (isTokenChar(*p) || !rtl::isAscii(*p)))
                ++p;
            if (p == pTokenBegin)
                break;
            if (pParameters)
                aValue = OString(
                    pTokenBegin, p - pTokenBegin,
                    RTL_TEXTENCODING_UTF8);
        }

        *pPos = new Parameter(*pPos, aAttribute, aCharset, aValue,
                              nSection, bExtended);
    }
    return parseParameters(aList, pParameters) ? pParameterBegin : pBegin;
}

bool equalIgnoreCase(const sal_Char * pBegin1,
                               const sal_Char * pEnd1,
                               const sal_Char * pString2)
{
    DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
               "equalIgnoreCase(): Bad sequences");

    while (*pString2 != 0)
        if (pBegin1 == pEnd1
            || rtl::toAsciiUpperCase(*pBegin1++) != rtl::toAsciiUpperCase(*pString2++))
            return false;
    return pBegin1 == pEnd1;
}

struct EncodingEntry
{
    sal_Char const * m_aName;
    rtl_TextEncoding m_eEncoding;
};

// The source for the following table is <ftp://ftp.iana.org/in-notes/iana/
// assignments/character-sets> as of Jan, 21 2000 12:46:00, unless  otherwise
// noted:
static EncodingEntry const aEncodingMap[]
    = { { "US-ASCII", RTL_TEXTENCODING_ASCII_US },
        { "ANSI_X3.4-1968", RTL_TEXTENCODING_ASCII_US },
        { "ISO-IR-6", RTL_TEXTENCODING_ASCII_US },
        { "ANSI_X3.4-1986", RTL_TEXTENCODING_ASCII_US },
        { "ISO_646.IRV:1991", RTL_TEXTENCODING_ASCII_US },
        { "ASCII", RTL_TEXTENCODING_ASCII_US },
        { "ISO646-US", RTL_TEXTENCODING_ASCII_US },
        { "US", RTL_TEXTENCODING_ASCII_US },
        { "IBM367", RTL_TEXTENCODING_ASCII_US },
        { "CP367", RTL_TEXTENCODING_ASCII_US },
        { "CSASCII", RTL_TEXTENCODING_ASCII_US },
        { "ISO-8859-1", RTL_TEXTENCODING_ISO_8859_1 },
        { "ISO_8859-1:1987", RTL_TEXTENCODING_ISO_8859_1 },
        { "ISO-IR-100", RTL_TEXTENCODING_ISO_8859_1 },
        { "ISO_8859-1", RTL_TEXTENCODING_ISO_8859_1 },
        { "LATIN1", RTL_TEXTENCODING_ISO_8859_1 },
        { "L1", RTL_TEXTENCODING_ISO_8859_1 },
        { "IBM819", RTL_TEXTENCODING_ISO_8859_1 },
        { "CP819", RTL_TEXTENCODING_ISO_8859_1 },
        { "CSISOLATIN1", RTL_TEXTENCODING_ISO_8859_1 },
        { "ISO-8859-2", RTL_TEXTENCODING_ISO_8859_2 },
        { "ISO_8859-2:1987", RTL_TEXTENCODING_ISO_8859_2 },
        { "ISO-IR-101", RTL_TEXTENCODING_ISO_8859_2 },
        { "ISO_8859-2", RTL_TEXTENCODING_ISO_8859_2 },
        { "LATIN2", RTL_TEXTENCODING_ISO_8859_2 },
        { "L2", RTL_TEXTENCODING_ISO_8859_2 },
        { "CSISOLATIN2", RTL_TEXTENCODING_ISO_8859_2 },
        { "ISO-8859-3", RTL_TEXTENCODING_ISO_8859_3 },
        { "ISO_8859-3:1988", RTL_TEXTENCODING_ISO_8859_3 },
        { "ISO-IR-109", RTL_TEXTENCODING_ISO_8859_3 },
        { "ISO_8859-3", RTL_TEXTENCODING_ISO_8859_3 },
        { "LATIN3", RTL_TEXTENCODING_ISO_8859_3 },
        { "L3", RTL_TEXTENCODING_ISO_8859_3 },
        { "CSISOLATIN3", RTL_TEXTENCODING_ISO_8859_3 },
        { "ISO-8859-4", RTL_TEXTENCODING_ISO_8859_4 },
        { "ISO_8859-4:1988", RTL_TEXTENCODING_ISO_8859_4 },
        { "ISO-IR-110", RTL_TEXTENCODING_ISO_8859_4 },
        { "ISO_8859-4", RTL_TEXTENCODING_ISO_8859_4 },
        { "LATIN4", RTL_TEXTENCODING_ISO_8859_4 },
        { "L4", RTL_TEXTENCODING_ISO_8859_4 },
        { "CSISOLATIN4", RTL_TEXTENCODING_ISO_8859_4 },
        { "ISO-8859-5", RTL_TEXTENCODING_ISO_8859_5 },
        { "ISO_8859-5:1988", RTL_TEXTENCODING_ISO_8859_5 },
        { "ISO-IR-144", RTL_TEXTENCODING_ISO_8859_5 },
        { "ISO_8859-5", RTL_TEXTENCODING_ISO_8859_5 },
        { "CYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
        { "CSISOLATINCYRILLIC", RTL_TEXTENCODING_ISO_8859_5 },
        { "ISO-8859-6", RTL_TEXTENCODING_ISO_8859_6 },
        { "ISO_8859-6:1987", RTL_TEXTENCODING_ISO_8859_6 },
        { "ISO-IR-127", RTL_TEXTENCODING_ISO_8859_6 },
        { "ISO_8859-6", RTL_TEXTENCODING_ISO_8859_6 },
        { "ECMA-114", RTL_TEXTENCODING_ISO_8859_6 },
        { "ASMO-708", RTL_TEXTENCODING_ISO_8859_6 },
        { "ARABIC", RTL_TEXTENCODING_ISO_8859_6 },
        { "CSISOLATINARABIC", RTL_TEXTENCODING_ISO_8859_6 },
        { "ISO-8859-7", RTL_TEXTENCODING_ISO_8859_7 },
        { "ISO_8859-7:1987", RTL_TEXTENCODING_ISO_8859_7 },
        { "ISO-IR-126", RTL_TEXTENCODING_ISO_8859_7 },
        { "ISO_8859-7", RTL_TEXTENCODING_ISO_8859_7 },
        { "ELOT_928", RTL_TEXTENCODING_ISO_8859_7 },
        { "ECMA-118", RTL_TEXTENCODING_ISO_8859_7 },
        { "GREEK", RTL_TEXTENCODING_ISO_8859_7 },
        { "GREEK8", RTL_TEXTENCODING_ISO_8859_7 },
        { "CSISOLATINGREEK", RTL_TEXTENCODING_ISO_8859_7 },
        { "ISO-8859-8", RTL_TEXTENCODING_ISO_8859_8 },
        { "ISO_8859-8:1988", RTL_TEXTENCODING_ISO_8859_8 },
        { "ISO-IR-138", RTL_TEXTENCODING_ISO_8859_8 },
        { "ISO_8859-8", RTL_TEXTENCODING_ISO_8859_8 },
        { "HEBREW", RTL_TEXTENCODING_ISO_8859_8 },
        { "CSISOLATINHEBREW", RTL_TEXTENCODING_ISO_8859_8 },
        { "ISO-8859-9", RTL_TEXTENCODING_ISO_8859_9 },
        { "ISO_8859-9:1989", RTL_TEXTENCODING_ISO_8859_9 },
        { "ISO-IR-148", RTL_TEXTENCODING_ISO_8859_9 },
        { "ISO_8859-9", RTL_TEXTENCODING_ISO_8859_9 },
        { "LATIN5", RTL_TEXTENCODING_ISO_8859_9 },
        { "L5", RTL_TEXTENCODING_ISO_8859_9 },
        { "CSISOLATIN5", RTL_TEXTENCODING_ISO_8859_9 },
        { "ISO-8859-14", RTL_TEXTENCODING_ISO_8859_14 }, // RFC 2047
        { "ISO_8859-15", RTL_TEXTENCODING_ISO_8859_15 },
        { "ISO-8859-15", RTL_TEXTENCODING_ISO_8859_15 }, // RFC 2047
        { "MACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
        { "MAC", RTL_TEXTENCODING_APPLE_ROMAN },
        { "CSMACINTOSH", RTL_TEXTENCODING_APPLE_ROMAN },
        { "IBM437", RTL_TEXTENCODING_IBM_437 },
        { "CP437", RTL_TEXTENCODING_IBM_437 },
        { "437", RTL_TEXTENCODING_IBM_437 },
        { "CSPC8CODEPAGE437", RTL_TEXTENCODING_IBM_437 },
        { "IBM850", RTL_TEXTENCODING_IBM_850 },
        { "CP850", RTL_TEXTENCODING_IBM_850 },
        { "850", RTL_TEXTENCODING_IBM_850 },
        { "CSPC850MULTILINGUAL", RTL_TEXTENCODING_IBM_850 },
        { "IBM860", RTL_TEXTENCODING_IBM_860 },
        { "CP860", RTL_TEXTENCODING_IBM_860 },
        { "860", RTL_TEXTENCODING_IBM_860 },
        { "CSIBM860", RTL_TEXTENCODING_IBM_860 },
        { "IBM861", RTL_TEXTENCODING_IBM_861 },
        { "CP861", RTL_TEXTENCODING_IBM_861 },
        { "861", RTL_TEXTENCODING_IBM_861 },
        { "CP-IS", RTL_TEXTENCODING_IBM_861 },
        { "CSIBM861", RTL_TEXTENCODING_IBM_861 },
        { "IBM863", RTL_TEXTENCODING_IBM_863 },
        { "CP863", RTL_TEXTENCODING_IBM_863 },
        { "863", RTL_TEXTENCODING_IBM_863 },
        { "CSIBM863", RTL_TEXTENCODING_IBM_863 },
        { "IBM865", RTL_TEXTENCODING_IBM_865 },
        { "CP865", RTL_TEXTENCODING_IBM_865 },
        { "865", RTL_TEXTENCODING_IBM_865 },
        { "CSIBM865", RTL_TEXTENCODING_IBM_865 },
        { "IBM775", RTL_TEXTENCODING_IBM_775 },
        { "CP775", RTL_TEXTENCODING_IBM_775 },
        { "CSPC775BALTIC", RTL_TEXTENCODING_IBM_775 },
        { "IBM852", RTL_TEXTENCODING_IBM_852 },
        { "CP852", RTL_TEXTENCODING_IBM_852 },
        { "852", RTL_TEXTENCODING_IBM_852 },
        { "CSPCP852", RTL_TEXTENCODING_IBM_852 },
        { "IBM855", RTL_TEXTENCODING_IBM_855 },
        { "CP855", RTL_TEXTENCODING_IBM_855 },
        { "855", RTL_TEXTENCODING_IBM_855 },
        { "CSIBM855", RTL_TEXTENCODING_IBM_855 },
        { "IBM857", RTL_TEXTENCODING_IBM_857 },
        { "CP857", RTL_TEXTENCODING_IBM_857 },
        { "857", RTL_TEXTENCODING_IBM_857 },
        { "CSIBM857", RTL_TEXTENCODING_IBM_857 },
        { "IBM862", RTL_TEXTENCODING_IBM_862 },
        { "CP862", RTL_TEXTENCODING_IBM_862 },
        { "862", RTL_TEXTENCODING_IBM_862 },
        { "CSPC862LATINHEBREW", RTL_TEXTENCODING_IBM_862 },
        { "IBM864", RTL_TEXTENCODING_IBM_864 },
        { "CP864", RTL_TEXTENCODING_IBM_864 },
        { "CSIBM864", RTL_TEXTENCODING_IBM_864 },
        { "IBM866", RTL_TEXTENCODING_IBM_866 },
        { "CP866", RTL_TEXTENCODING_IBM_866 },
        { "866", RTL_TEXTENCODING_IBM_866 },
        { "CSIBM866", RTL_TEXTENCODING_IBM_866 },
        { "IBM869", RTL_TEXTENCODING_IBM_869 },
        { "CP869", RTL_TEXTENCODING_IBM_869 },
        { "869", RTL_TEXTENCODING_IBM_869 },
        { "CP-GR", RTL_TEXTENCODING_IBM_869 },
        { "CSIBM869", RTL_TEXTENCODING_IBM_869 },
        { "WINDOWS-1250", RTL_TEXTENCODING_MS_1250 },
        { "WINDOWS-1251", RTL_TEXTENCODING_MS_1251 },
        { "WINDOWS-1253", RTL_TEXTENCODING_MS_1253 },
        { "WINDOWS-1254", RTL_TEXTENCODING_MS_1254 },
        { "WINDOWS-1255", RTL_TEXTENCODING_MS_1255 },
        { "WINDOWS-1256", RTL_TEXTENCODING_MS_1256 },
        { "WINDOWS-1257", RTL_TEXTENCODING_MS_1257 },
        { "WINDOWS-1258", RTL_TEXTENCODING_MS_1258 },
        { "SHIFT_JIS", RTL_TEXTENCODING_SHIFT_JIS },
        { "MS_KANJI", RTL_TEXTENCODING_SHIFT_JIS },
        { "CSSHIFTJIS", RTL_TEXTENCODING_SHIFT_JIS },
        { "GB2312", RTL_TEXTENCODING_GB_2312 },
        { "CSGB2312", RTL_TEXTENCODING_GB_2312 },
        { "BIG5", RTL_TEXTENCODING_BIG5 },
        { "CSBIG5", RTL_TEXTENCODING_BIG5 },
        { "EUC-JP", RTL_TEXTENCODING_EUC_JP },
        { "EXTENDED_UNIX_CODE_PACKED_FORMAT_FOR_JAPANESE",
          RTL_TEXTENCODING_EUC_JP },
        { "CSEUCPKDFMTJAPANESE", RTL_TEXTENCODING_EUC_JP },
        { "ISO-2022-JP", RTL_TEXTENCODING_ISO_2022_JP },
        { "CSISO2022JP", RTL_TEXTENCODING_ISO_2022_JP },
        { "ISO-2022-CN", RTL_TEXTENCODING_ISO_2022_CN },
        { "KOI8-R", RTL_TEXTENCODING_KOI8_R },
        { "CSKOI8R", RTL_TEXTENCODING_KOI8_R },
        { "UTF-7", RTL_TEXTENCODING_UTF7 },
        { "UTF-8", RTL_TEXTENCODING_UTF8 },
        { "ISO-8859-10", RTL_TEXTENCODING_ISO_8859_10 }, // RFC 2047
        { "ISO-8859-13", RTL_TEXTENCODING_ISO_8859_13 }, // RFC 2047
        { "EUC-KR", RTL_TEXTENCODING_EUC_KR },
        { "CSEUCKR", RTL_TEXTENCODING_EUC_KR },
        { "ISO-2022-KR", RTL_TEXTENCODING_ISO_2022_KR },
        { "CSISO2022KR", RTL_TEXTENCODING_ISO_2022_KR },
        { "ISO-10646-UCS-4", RTL_TEXTENCODING_UCS4 },
        { "CSUCS4", RTL_TEXTENCODING_UCS4 },
        { "ISO-10646-UCS-2", RTL_TEXTENCODING_UCS2 },
        { "CSUNICODE", RTL_TEXTENCODING_UCS2 } };

rtl_TextEncoding getCharsetEncoding(sal_Char const * pBegin,
                                              sal_Char const * pEnd)
{
    for (const EncodingEntry& i : aEncodingMap)
        if (equalIgnoreCase(pBegin, pEnd, i.m_aName))
            return i.m_eEncoding;
    return RTL_TEXTENCODING_DONTKNOW;
}

}

//  INetMIME

// static
bool INetMIME::isAtomChar(sal_uInt32 nChar)
{
    static const bool aMap[128]
        = { false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false,
            false,  true, false,  true,  true,  true,  true,  true, // !"#$%&'
            false, false,  true,  true, false,  true, false,  true, //()*+,-./
             true,  true,  true,  true,  true,  true,  true,  true, //01234567
             true,  true, false, false, false,  true, false,  true, //89:;<=>?
            false,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
             true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
             true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
             true,  true,  true, false, false, false,  true,  true, //XYZ[\]^_
             true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
             true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
             true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
             true,  true,  true,  true,  true,  true,  true, false  //xyz{|}~
          };
    return rtl::isAscii(nChar) && aMap[nChar];
}

// static
bool INetMIME::isIMAPAtomChar(sal_uInt32 nChar)
{
    static const bool aMap[128]
        = { false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false,
            false, false, false, false, false, false, false, false,
            false,  true, false,  true,  true, false,  true,  true, // !"#$%&'
            false, false, false,  true,  true,  true,  true,  true, //()*+,-./
             true,  true,  true,  true,  true,  true,  true,  true, //01234567
             true,  true,  true,  true,  true,  true,  true,  true, //89:;<=>?
             true,  true,  true,  true,  true,  true,  true,  true, //@ABCDEFG
             true,  true,  true,  true,  true,  true,  true,  true, //HIJKLMNO
             true,  true,  true,  true,  true,  true,  true,  true, //PQRSTUVW
             true,  true,  true,  true, false,  true,  true,  true, //XYZ[\]^_
             true,  true,  true,  true,  true,  true,  true,  true, //`abcdefg
             true,  true,  true,  true,  true,  true,  true,  true, //hijklmno
             true,  true,  true,  true,  true,  true,  true,  true, //pqrstuvw
             true,  true,  true, false,  true,  true,  true, false  //xyz{|}~
          };
    return rtl::isAscii(nChar) && aMap[nChar];
}

// static
bool INetMIME::equalIgnoreCase(const sal_Unicode * pBegin1,
                               const sal_Unicode * pEnd1,
                               const sal_Char * pString2)
{
    DBG_ASSERT(pBegin1 && pBegin1 <= pEnd1 && pString2,
               "INetMIME::equalIgnoreCase(): Bad sequences");

    while (*pString2 != 0)
        if (pBegin1 == pEnd1
            || rtl::toAsciiUpperCase(*pBegin1++) != rtl::toAsciiUpperCase(*pString2++))
            return false;
    return pBegin1 == pEnd1;
}

// static
bool INetMIME::scanUnsigned(const sal_Unicode *& rBegin,
                            const sal_Unicode * pEnd, bool bLeadingZeroes,
                            sal_uInt32 & rValue)
{
    sal_uInt64 nTheValue = 0;
    const sal_Unicode * p = rBegin;
    for ( ; p != pEnd; ++p)
    {
        int nWeight = getWeight(*p);
        if (nWeight < 0)
            break;
        nTheValue = 10 * nTheValue + nWeight;
        if (nTheValue > std::numeric_limits< sal_uInt32 >::max())
            return false;
    }
    if (nTheValue == 0 && (p == rBegin || (!bLeadingZeroes && p - rBegin != 1)))
        return false;
    rBegin = p;
    rValue = sal_uInt32(nTheValue);
    return true;
}

// static
sal_Unicode const * INetMIME::scanContentType(
    sal_Unicode const * pBegin, sal_Unicode const * pEnd, OUString * pType,
    OUString * pSubType, INetContentTypeParameterList * pParameters)
{
    sal_Unicode const * p = skipLinearWhiteSpaceComment(pBegin, pEnd);
    sal_Unicode const * pTypeBegin = p;
    while (p != pEnd && isTokenChar(*p))
    {
        ++p;
    }
    if (p == pTypeBegin)
        return nullptr;
    sal_Unicode const * pTypeEnd = p;

    p = skipLinearWhiteSpaceComment(p, pEnd);
    if (p == pEnd || *p++ != '/')
        return nullptr;

    p = skipLinearWhiteSpaceComment(p, pEnd);
    sal_Unicode const * pSubTypeBegin = p;
    while (p != pEnd && isTokenChar(*p))
    {
        ++p;
    }
    if (p == pSubTypeBegin)
        return nullptr;
    sal_Unicode const * pSubTypeEnd = p;

    if (pType != nullptr)
    {
        *pType = OUString(pTypeBegin, pTypeEnd - pTypeBegin).toAsciiLowerCase();
    }
    if (pSubType != nullptr)
    {
        *pSubType = OUString(pSubTypeBegin, pSubTypeEnd - pSubTypeBegin)
            .toAsciiLowerCase();
    }

    return scanParameters(p, pEnd, pParameters);
}

// static
OUString INetMIME::decodeHeaderFieldBody(const OString& rBody)
{
    // Due to a bug in INetCoreRFC822MessageStream::ConvertTo7Bit(), old
    // versions of StarOffice send mails with header fields where encoded
    // words can be preceded by '=', ',', '.', '"', or '(', and followed by
    // '=', ',', '.', '"', ')', without any required white space in between.
    // And there appear to exist some broken mailers that only encode single
    // letters within words, like "Appel
    // =?iso-8859-1?Q?=E0?=t=?iso-8859-1?Q?=E9?=moin", so it seems best to
    // detect encoded words even when not properly surrounded by white space.

    // Non US-ASCII characters in rBody are treated as ISO-8859-1.

    // encoded-word = "=?"
    //     1*(%x21 / %x23-27 / %x2A-2B / %x2D / %30-39 / %x41-5A / %x5E-7E)
    //     ["*" 1*8ALPHA *("-" 1*8ALPHA)] "?"
    //     ("B?" *(4base64) (4base64 / 3base64 "=" / 2base64 "==")
    //      / "Q?" 1*(%x21-3C / %x3E / %x40-7E / "=" 2HEXDIG))
    //     "?="

    // base64 = ALPHA / DIGIT / "+" / "/"

    const sal_Char * pBegin = rBody.getStr();
    const sal_Char * pEnd = pBegin + rBody.getLength();

    OUString sDecoded;
    const sal_Char * pCopyBegin = pBegin;

    /* bool bStartEncodedWord = true; */
    const sal_Char * pWSPBegin = pBegin;

    for (const sal_Char * p = pBegin; p != pEnd;)
    {
        OUString sEncodedText;
        if (p != pEnd && *p == '=' /* && bStartEncodedWord */)
        {
            const sal_Char * q = p + 1;
            bool bEncodedWord = q != pEnd && *q++ == '?';

            rtl_TextEncoding eCharsetEncoding = RTL_TEXTENCODING_DONTKNOW;
            if (bEncodedWord)
            {
                const sal_Char * pCharsetBegin = q;
                const sal_Char * pLanguageBegin = nullptr;
                int nAlphaCount = 0;
                for (bool bDone = false; !bDone;)
                    if (q == pEnd)
                    {
                        bEncodedWord = false;
                        bDone = true;
                    }
                    else
                    {
                        sal_Char cChar = *q++;
                        switch (cChar)
                        {
                            case '*':
                                pLanguageBegin = q - 1;
                                nAlphaCount = 0;
                                break;

                            case '-':
                                if (pLanguageBegin != nullptr)
                                {
                                    if (nAlphaCount == 0)
                                        pLanguageBegin = nullptr;
                                    else
                                        nAlphaCount = 0;
                                }
                                break;

                            case '?':
                                if (pCharsetBegin == q - 1)
                                    bEncodedWord = false;
                                else
                                {
                                    eCharsetEncoding
                                        = getCharsetEncoding(
                                              pCharsetBegin,
                                              pLanguageBegin == nullptr
                                              || nAlphaCount == 0 ?
                                                  q - 1 : pLanguageBegin);
                                    bEncodedWord = isMIMECharsetEncoding(
                                                       eCharsetEncoding);
                                    eCharsetEncoding
                                        = translateFromMIME(eCharsetEncoding);
                                }
                                bDone = true;
                                break;

                            default:
                                if (pLanguageBegin != nullptr
                                    && (!rtl::isAsciiAlpha(cChar) || ++nAlphaCount > 8))
                                    pLanguageBegin = nullptr;
                                break;
                        }
                    }
            }

            bool bEncodingB = false;
            if (bEncodedWord)
            {
                if (q == pEnd)
                    bEncodedWord = false;
                else
                {
                    switch (*q++)
                    {
                        case 'B':
                        case 'b':
                            bEncodingB = true;
                            break;

                        case 'Q':
                        case 'q':
                            bEncodingB = false;
                            break;

                        default:
                            bEncodedWord = false;
                            break;
                    }
                }
            }

            bEncodedWord = bEncodedWord && q != pEnd && *q++ == '?';

            OStringBuffer sText;
            if (bEncodedWord)
            {
                if (bEncodingB)
                {
                    for (bool bDone = false; !bDone;)
                    {
                        if (pEnd - q < 4)
                        {
                            bEncodedWord = false;
                            bDone = true;
                        }
                        else
                        {
                            bool bFinal = false;
                            int nCount = 3;
                            sal_uInt32 nValue = 0;
                            for (int nShift = 18; nShift >= 0; nShift -= 6)
                            {
                                int nWeight = getBase64Weight(*q++);
                                if (nWeight == -2)
                                {
                                    bEncodedWord = false;
                                    bDone = true;
                                    break;
                                }
                                if (nWeight == -1)
                                {
                                    if (!bFinal)
                                    {
                                        if (nShift >= 12)
                                        {
                                            bEncodedWord = false;
                                            bDone = true;
                                            break;
                                        }
                                        bFinal = true;
                                        nCount = nShift == 6 ? 1 : 2;
                                    }
                                }
                                else
                                    nValue |= nWeight << nShift;
                            }
                            if (bEncodedWord)
                            {
                                for (int nShift = 16; nCount-- > 0; nShift -= 8)
                                    sText.append(sal_Char(nValue >> nShift & 0xFF));
                                if (*q == '?')
                                {
                                    ++q;
                                    bDone = true;
                                }
                                if (bFinal && !bDone)
                                {
                                    bEncodedWord = false;
                                    bDone = true;
                                }
                            }
                        }
                    }
                }
                else
                {
                    const sal_Char * pEncodedTextBegin = q;
                    const sal_Char * pEncodedTextCopyBegin = q;
                    for (bool bDone = false; !bDone;)
                        if (q == pEnd)
                        {
                            bEncodedWord = false;
                            bDone = true;
                        }
                        else
                        {
                            sal_uInt32 nChar = *q++;
                            switch (nChar)
                            {
                                case '=':
                                {
                                    if (pEnd - q < 2)
                                    {
                                        bEncodedWord = false;
                                        bDone = true;
                                        break;
                                    }
                                    int nDigit1 = getHexWeight(q[0]);
                                    int nDigit2 = getHexWeight(q[1]);
                                    if (nDigit1 < 0 || nDigit2 < 0)
                                    {
                                        bEncodedWord = false;
                                        bDone = true;
                                        break;
                                    }
                                    sText.append(rBody.copy(
                                        (pEncodedTextCopyBegin - pBegin),
                                        (q - 1 - pEncodedTextCopyBegin)));
                                    sText.append(sal_Char(nDigit1 << 4 | nDigit2));
                                    q += 2;
                                    pEncodedTextCopyBegin = q;
                                    break;
                                }

                                case '?':
                                    if (q - pEncodedTextBegin > 1)
                                        sText.append(rBody.copy(
                                            (pEncodedTextCopyBegin - pBegin),
                                            (q - 1 - pEncodedTextCopyBegin)));
                                    else
                                        bEncodedWord = false;
                                    bDone = true;
                                    break;

                                case '_':
                                    sText.append(rBody.copy(
                                        (pEncodedTextCopyBegin - pBegin),
                                        (q - 1 - pEncodedTextCopyBegin)));
                                    sText.append(' ');
                                    pEncodedTextCopyBegin = q;
                                    break;

                                default:
                                    if (!isVisible(nChar))
                                    {
                                        bEncodedWord = false;
                                        bDone = true;
                                    }
                                    break;
                            }
                        }
                }
            }

            bEncodedWord = bEncodedWord && q != pEnd && *q++ == '=';

            sal_Unicode * pUnicodeBuffer = nullptr;
            sal_Size nUnicodeSize = 0;
            if (bEncodedWord)
            {
                pUnicodeBuffer
                    = convertToUnicode(sText.getStr(),
                                       sText.getStr() + sText.getLength(),
                                       eCharsetEncoding, nUnicodeSize);
                if (pUnicodeBuffer == nullptr)
                    bEncodedWord = false;
            }

            if (bEncodedWord)
            {
                appendISO88591(sDecoded, pCopyBegin, pWSPBegin);
                sDecoded += OUString(
                    pUnicodeBuffer,
                    static_cast< sal_Int32 >(nUnicodeSize));
                delete[] pUnicodeBuffer;
                p = q;
                pCopyBegin = p;

                pWSPBegin = p;
                while (p != pEnd && isWhiteSpace(*p))
                    ++p;
                /* bStartEncodedWord = p != pWSPBegin; */
                continue;
            }
        }

        if (!sEncodedText.isEmpty())
            sDecoded += sEncodedText;

        if (p == pEnd)
            break;

        switch (*p++)
        {
            case '"':
                /* bStartEncodedWord = true; */
                break;

            case '(':
                /* bStartEncodedWord = true; */
                break;

            case ')':
                /* bStartEncodedWord = false; */
                break;

            default:
            {
                const sal_Char * pUTF8Begin = p - 1;
                const sal_Char * pUTF8End = pUTF8Begin;
                sal_uInt32 nCharacter = 0;
                if (translateUTF8Char(pUTF8End, pEnd, RTL_TEXTENCODING_UCS4,
                                      nCharacter))
                {
                    appendISO88591(sDecoded, pCopyBegin, p - 1);
                    sal_Unicode aUTF16Buf[2];
                    sal_Int32 nUTF16Len = putUTF32Character(aUTF16Buf, nCharacter) - aUTF16Buf;
                    sDecoded += OUString(aUTF16Buf, nUTF16Len);
                    p = pUTF8End;
                    pCopyBegin = p;
                }
                /* bStartEncodedWord = false; */
                break;
            }
        }
        pWSPBegin = p;
    }

    appendISO88591(sDecoded, pCopyBegin, pEnd);
    return sDecoded;
}

void INetMIMEOutputSink::writeSequence(const sal_Char * pBegin,
                                       const sal_Char * pEnd)
{
    OSL_ENSURE(pBegin && pBegin <= pEnd,
               "INetMIMEOutputSink::writeSequence(): Bad sequence");

    m_aBuffer.append(pBegin, pEnd - pBegin);
}

void INetMIMEOutputSink::writeSequence(const sal_Char * pSequence)
{
    sal_Size nLength = rtl_str_getLength(pSequence);
    writeSequence(pSequence, pSequence + nLength);
}

void INetMIMEOutputSink::writeSequence(const sal_Unicode * pBegin,
                                       const sal_Unicode * pEnd)
{
    assert(pBegin && pBegin <= pEnd &&
               "INetMIMEOutputSink::writeSequence(): Bad sequence");

    std::unique_ptr<sal_Char[]> pBufferBegin( new sal_Char[pEnd - pBegin] );
    sal_Char * pBufferEnd = pBufferBegin.get();
    while (pBegin != pEnd)
    {
        DBG_ASSERT(*pBegin < 256,
                   "INetMIMEOutputSink::writeSequence(): Bad octet");
        *pBufferEnd++ = sal_Char(*pBegin++);
    }
    writeSequence(pBufferBegin.get(), pBufferEnd);
}

/* vim:set shiftwidth=4 softtabstop=4 expandtab: */