office-gobmx/sal/textenc/tcvtutf7.cxx
Stephan Bergmann 7c704c78d3 Removed some unused parameters; added SAL_UNUSED_PARAMETER.
SAL_UNUSED_PARAMETER (expanding to __attribute__ ((unused)) for GCC)
is used to annotate legitimately unused parameters, so that static
analysis tools can tell legitimately unused parameters from truly
unnecessary ones.  To that end, some patches for external modules
are also added, that are only applied when compiling with GCC and
add necessary __attribute__ ((unused)) in headers.
2012-01-21 15:21:16 +01:00

583 lines
21 KiB
C++

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*************************************************************************
*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* Copyright 2000, 2010 Oracle and/or its affiliates.
*
* OpenOffice.org - a multi-platform office productivity suite
*
* This file is part of OpenOffice.org.
*
* OpenOffice.org is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License version 3
* only, as published by the Free Software Foundation.
*
* OpenOffice.org is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License version 3 for more details
* (a copy is included in the LICENSE file that accompanied this code).
*
* You should have received a copy of the GNU Lesser General Public License
* version 3 along with OpenOffice.org. If not, see
* <http://www.openoffice.org/license.html>
* for a copy of the LGPLv3 License.
*
************************************************************************/
#include "sal/config.h"
#include "rtl/textcvt.h"
#include "tenchelp.hxx"
#include "unichars.hxx"
/* ======================================================================= */
static sal_uChar const aImplBase64Tab[64] =
{
/* A-Z */
0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
0x58, 0x59, 0x5A,
/* a-z */
0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
0x78, 0x79, 0x7A,
/* 0-9,+,/ */
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
0x38, 0x39, 0x2B, 0x2F
};
/* Index in Base64Tab or 0xFF, when is a invalid character */
static sal_uChar const aImplBase64IndexTab[128] =
{
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x00-0x07 */
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x08-0x0F */
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x10-0x17 */
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x18-0x1F */
0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x20-0x27 !"#$%&' */
0xFF, 0xFF, 0xFF, 62, 0xFF, 0xFF, 0xFF, 63, /* 0x28-0x2F ()*+,-./ */
52, 53, 54, 55, 56, 57, 58, 59, /* 0x30-0x37 01234567 */
60, 61, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x38-0x3F 89:;<=>? */
0xFF, 0, 1, 2, 3, 4, 5, 6, /* 0x40-0x47 @ABCDEFG */
7, 8, 9, 10, 11, 12, 13, 14, /* 0x48-0x4F HIJKLMNO */
15, 16, 17, 18, 19, 20, 21, 22, /* 0x50-0x57 PQRSTUVW */
23, 24, 25, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, /* 0x58-0x5F XYZ[\]^_ */
0xFF, 26, 27, 28, 29, 30, 31, 32, /* 0x60-0x67 `abcdefg */
33, 34, 35, 36, 37, 38, 39, 40, /* 0x68-0x6F hijklmno */
41, 42, 43, 44, 45, 46, 47, 48, /* 0x70-0x77 pqrstuvw */
49, 50, 51, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF /* 0x78-0x7F xyz{|}~ */
};
static sal_uChar const aImplMustShiftTab[128] =
{
1, 1, 1, 1, 1, 1, 1, 1, /* 0x00-0x07 */
1, 0, 0, 1, 0, 1, 1, 1, /* 0x08-0x0F 0x09 == HTAB, 0x0A == LF 0x0C == CR */
1, 1, 1, 1, 1, 1, 1, 1, /* 0x10-0x17 */
1, 1, 1, 1, 1, 1, 1, 1, /* 0x18-0x1F */
0, 1, 1, 1, 1, 1, 1, 0, /* 0x20-0x27 !"#$%&' */
0, 0, 1, 1, 0, 1, 0, 0, /* 0x28-0x2F ()*+,-./ */
0, 0, 0, 0, 0, 0, 0, 0, /* 0x30-0x37 01234567 */
0, 0, 0, 1, 1, 1, 1, 0, /* 0x38-0x3F 89:;<=>? */
1, 0, 0, 0, 0, 0, 0, 0, /* 0x40-0x47 @ABCDEFG */
0, 0, 0, 0, 0, 0, 0, 0, /* 0x48-0x4F HIJKLMNO */
0, 0, 0, 0, 0, 0, 0, 0, /* 0x50-0x57 PQRSTUVW */
0, 0, 0, 1, 1, 1, 1, 1, /* 0x58-0x5F XYZ[\]^_ */
1, 0, 0, 0, 0, 0, 0, 0, /* 0x60-0x67 `abcdefg */
0, 0, 0, 0, 0, 0, 0, 0, /* 0x68-0x6F hijklmno */
0, 0, 0, 0, 0, 0, 0, 0, /* 0x70-0x77 pqrstuvw */
0, 0, 0, 1, 1, 1, 1, 1 /* 0x78-0x7F xyz{|}~ */
};
/* + */
#define IMPL_SHIFT_IN_CHAR 0x2B
/* - */
#define IMPL_SHIFT_OUT_CHAR 0x2D
/* ----------------------------------------------------------------------- */
struct ImplUTF7ToUCContextData
{
int mbShifted;
int mbFirst;
int mbWroteOne;
sal_uInt32 mnBitBuffer;
sal_uInt32 mnBufferBits;
};
/* ----------------------------------------------------------------------- */
void* ImplUTF7CreateUTF7TextToUnicodeContext()
{
ImplUTF7ToUCContextData* pContextData = new ImplUTF7ToUCContextData;
pContextData->mbShifted = sal_False;
pContextData->mbFirst = sal_False;
pContextData->mbWroteOne = sal_False;
pContextData->mnBitBuffer = 0;
pContextData->mnBufferBits = 0;
return pContextData;
}
/* ----------------------------------------------------------------------- */
void ImplUTF7DestroyTextToUnicodeContext( void* pContext )
{
delete static_cast< ImplUTF7ToUCContextData * >(pContext);
}
/* ----------------------------------------------------------------------- */
void ImplUTF7ResetTextToUnicodeContext( void* pContext )
{
ImplUTF7ToUCContextData* pContextData = (ImplUTF7ToUCContextData*)pContext;
pContextData->mbShifted = sal_False;
pContextData->mbFirst = sal_False;
pContextData->mbWroteOne = sal_False;
pContextData->mnBitBuffer = 0;
pContextData->mnBufferBits = 0;
}
/* ----------------------------------------------------------------------- */
sal_Size ImplUTF7ToUnicode( SAL_UNUSED_PARAMETER const void*, void* pContext,
const char* pSrcBuf, sal_Size nSrcBytes,
sal_Unicode* pDestBuf, sal_Size nDestChars,
sal_uInt32 nFlags, sal_uInt32* pInfo,
sal_Size* pSrcCvtBytes )
{
ImplUTF7ToUCContextData* pContextData = (ImplUTF7ToUCContextData*)pContext;
sal_uChar c ='\0';
sal_uChar nBase64Value = 0;
int bEnd = sal_False;
int bShifted;
int bFirst;
int bWroteOne;
int bBase64End;
sal_uInt32 nBitBuffer;
sal_uInt32 nBitBufferTemp;
sal_uInt32 nBufferBits;
sal_Unicode* pEndDestBuf;
const char* pEndSrcBuf;
/* !!! Implementation not finnished !!!
if ( pContextData )
{
bShifted = pContextData->mbShifted;
bFirst = pContextData->mbFirst;
bWroteOne = pContextData->mbWroteOne;
nBitBuffer = pContextData->mnBitBuffer;
nBufferBits = pContextData->mnBufferBits;
}
else
*/
{
bShifted = sal_False;
bFirst = sal_False;
bWroteOne = sal_False;
nBitBuffer = 0;
nBufferBits = 0;
}
*pInfo = 0;
pEndDestBuf = pDestBuf+nDestChars;
pEndSrcBuf = pSrcBuf+nSrcBytes;
do
{
if ( pSrcBuf < pEndSrcBuf )
{
c = (sal_uChar)*pSrcBuf;
/* End, when not a base64 character */
bBase64End = sal_False;
if ( c <= 0x7F )
{
nBase64Value = aImplBase64IndexTab[c];
if ( nBase64Value == 0xFF )
bBase64End = sal_True;
}
}
else
{
bEnd = sal_True;
bBase64End = sal_True;
}
if ( bShifted )
{
if ( bBase64End )
{
bShifted = sal_False;
/* If the character causing us to drop out was SHIFT_IN */
/* or SHIFT_OUT, it may be a special escape for SHIFT_IN. */
/* The test for SHIFT_IN is not necessary, but allows */
/* an alternate form of UTF-7 where SHIFT_IN is escaped */
/* by SHIFT_IN. This only works for some values of */
/* SHIFT_IN. It is so implemented, because this comes */
/* from the officel unicode book (The Unicode Standard, */
/* Version 2.0) and so I think, that someone of the */
/* world has used this feature. */
if ( !bEnd )
{
if ( (c == IMPL_SHIFT_IN_CHAR) || (c == IMPL_SHIFT_OUT_CHAR) )
{
/* If no base64 character, and the terminating */
/* character of the shift sequence was the */
/* SHIFT_OUT_CHAR, then it't a special escape */
/* for SHIFT_IN_CHAR. */
if ( bFirst && (c == IMPL_SHIFT_OUT_CHAR) )
{
if ( pDestBuf >= pEndDestBuf )
{
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
break;
}
*pDestBuf = IMPL_SHIFT_IN_CHAR;
pDestBuf++;
bWroteOne = sal_True;
}
/* Skip character */
pSrcBuf++;
if ( pSrcBuf < pEndSrcBuf )
c = (sal_uChar)*pSrcBuf;
else
bEnd = sal_True;
}
}
/* Empty sequence not allowed, so when we don't write one */
/* valid char, then the sequence is corrupt */
if ( !bWroteOne )
{
/* When no more bytes in the source buffer, then */
/* this buffer may be to small */
if ( bEnd )
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
else
{
*pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
{
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
break;
}
/* We insert here no default char, because I think */
/* this is better to ignore this */
}
}
}
else
{
/* Add 6 Bits from character to the bit buffer */
nBufferBits += 6;
nBitBuffer |= ((sal_uInt32)(nBase64Value & 0x3F)) << (32-nBufferBits);
bFirst = sal_False;
}
/* Extract as many full 16 bit characters as possible from the */
/* bit buffer. */
while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 16) )
{
nBitBufferTemp = nBitBuffer >> (32-16);
*pDestBuf = (sal_Unicode)((nBitBufferTemp) & 0xFFFF);
pDestBuf++;
nBitBuffer <<= 16;
nBufferBits -= 16;
bWroteOne = sal_True;
}
if ( nBufferBits >= 16 )
{
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
break;
}
if ( bBase64End )
{
/* Sequence ended and we have some bits, then the */
/* sequence is corrupted */
if ( nBufferBits && nBitBuffer )
{
/* When no more bytes in the source buffer, then */
/* this buffer may be to small */
if ( bEnd )
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
else
{
*pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
{
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
break;
}
else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
{
if ( pDestBuf >= pEndDestBuf )
{
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
break;
}
*pDestBuf++
= RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
}
}
}
nBitBuffer = 0;
nBufferBits = 0;
}
}
if ( !bEnd )
{
if ( !bShifted )
{
if ( c == IMPL_SHIFT_IN_CHAR )
{
bShifted = sal_True;
bFirst = sal_True;
bWroteOne = sal_False;
}
else
{
/* No direct encoded charcater, then the buffer is */
/* corrupt */
if ( c > 0x7F )
{
*pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
{
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
break;
}
else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) != RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
{
if ( pDestBuf >= pEndDestBuf )
{
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
break;
}
*pDestBuf++
= RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
}
}
/* Write char to unicode buffer */
if ( pDestBuf >= pEndDestBuf )
{
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
break;
}
*pDestBuf = c;
pDestBuf++;
}
}
pSrcBuf++;
}
}
while ( !bEnd );
if ( pContextData )
{
pContextData->mbShifted = bShifted;
pContextData->mbFirst = bFirst;
pContextData->mbWroteOne = bWroteOne;
pContextData->mnBitBuffer = nBitBuffer;
pContextData->mnBufferBits = nBufferBits;
}
*pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
return (nDestChars - (pEndDestBuf-pDestBuf));
}
/* ======================================================================= */
struct ImplUTF7FromUCContextData
{
int mbShifted;
sal_uInt32 mnBitBuffer;
sal_uInt32 mnBufferBits;
};
/* ----------------------------------------------------------------------- */
void* ImplUTF7CreateUnicodeToTextContext()
{
ImplUTF7FromUCContextData* pContextData = new ImplUTF7FromUCContextData;
pContextData->mbShifted = sal_False;
pContextData->mnBitBuffer = 0;
pContextData->mnBufferBits = 0;
return pContextData;
}
/* ----------------------------------------------------------------------- */
void ImplUTF7DestroyUnicodeToTextContext( void* pContext )
{
delete static_cast< ImplUTF7FromUCContextData * >(pContext);
}
/* ----------------------------------------------------------------------- */
void ImplUTF7ResetUnicodeToTextContext( void* pContext )
{
ImplUTF7FromUCContextData* pContextData = (ImplUTF7FromUCContextData*)pContext;
pContextData->mbShifted = sal_False;
pContextData->mnBitBuffer = 0;
pContextData->mnBufferBits = 0;
}
/* ----------------------------------------------------------------------- */
sal_Size ImplUnicodeToUTF7( SAL_UNUSED_PARAMETER const void*, void* pContext,
const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
char* pDestBuf, sal_Size nDestBytes,
SAL_UNUSED_PARAMETER sal_uInt32, sal_uInt32* pInfo,
sal_Size* pSrcCvtChars )
{
ImplUTF7FromUCContextData* pContextData = (ImplUTF7FromUCContextData*)pContext;
sal_Unicode c = '\0';
int bEnd = sal_False;
int bShifted;
int bNeedShift;
sal_uInt32 nBitBuffer;
sal_uInt32 nBitBufferTemp;
sal_uInt32 nBufferBits;
char* pEndDestBuf;
const sal_Unicode* pEndSrcBuf;
/* !!! Implementation not finnished !!!
if ( pContextData )
{
bShifted = pContextData->mbShifted;
nBitBuffer = pContextData->mnBitBuffer;
nBufferBits = pContextData->mnBufferBits;
}
else
*/
{
bShifted = sal_False;
nBitBuffer = 0;
nBufferBits = 0;
}
*pInfo = 0;
pEndDestBuf = pDestBuf+nDestBytes;
pEndSrcBuf = pSrcBuf+nSrcChars;
do
{
if ( pSrcBuf < pEndSrcBuf )
{
c = *pSrcBuf;
bNeedShift = (c > 0x7F) || aImplMustShiftTab[c];
if ( bNeedShift && !bShifted )
{
if ( pDestBuf >= pEndDestBuf )
{
*pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
break;
}
*pDestBuf = IMPL_SHIFT_IN_CHAR;
pDestBuf++;
/* Special case handling for SHIFT_IN_CHAR */
if ( c == IMPL_SHIFT_IN_CHAR )
{
if ( pDestBuf >= pEndDestBuf )
{
*pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
break;
}
*pDestBuf = IMPL_SHIFT_OUT_CHAR;
pDestBuf++;
}
else
bShifted = sal_True;
}
}
else
{
bEnd = sal_True;
bNeedShift = sal_False;
}
if ( bShifted )
{
/* Write the character to the bit buffer, or pad the bit */
/* buffer out to a full base64 character */
if ( bNeedShift )
{
nBufferBits += 16;
nBitBuffer |= ((sal_uInt32)c) << (32-nBufferBits);
}
else
nBufferBits += (6-(nBufferBits%6))%6;
/* Flush out as many full base64 characters as possible */
while ( (pDestBuf < pEndDestBuf) && (nBufferBits >= 6) )
{
nBitBufferTemp = nBitBuffer >> (32-6);
*pDestBuf = aImplBase64Tab[nBitBufferTemp];
pDestBuf++;
nBitBuffer <<= 6;
nBufferBits -= 6;
}
if ( nBufferBits >= 6 )
{
*pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
break;
}
/* Write SHIFT_OUT_CHAR, when needed */
if ( !bNeedShift )
{
if ( pDestBuf >= pEndDestBuf )
{
*pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
break;
}
*pDestBuf = IMPL_SHIFT_OUT_CHAR;
pDestBuf++;
bShifted = sal_False;
}
}
if ( !bEnd )
{
/* Character can be directly endcoded */
if ( !bNeedShift )
{
if ( pDestBuf >= pEndDestBuf )
{
*pInfo |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
break;
}
*pDestBuf = static_cast< char >(static_cast< unsigned char >(c));
pDestBuf++;
}
pSrcBuf++;
}
}
while ( !bEnd );
if ( pContextData )
{
pContextData->mbShifted = bShifted;
pContextData->mnBitBuffer = nBitBuffer;
pContextData->mnBufferBits = nBufferBits;
}
*pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
return (nDestBytes - (pEndDestBuf-pDestBuf));
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */