411e4d15ff
...which appears to have been broken when1382473505
"#81346# - Fix for unconverted characters for DBCS encodings" moved that "if" out of surrounding "if" block. (And, for consistency, write the "if" check in the same way as the preceding one is written since739cb04c36
"#97705# Fixed mapping of Big5 EUDC points.") Change-Id: I4324197c4eba671ab6313fb89f988da102b8ffa5 Reviewed-on: https://gerrit.libreoffice.org/78627 Tested-by: Jenkins Reviewed-by: Stephan Bergmann <sbergman@redhat.com>
682 lines
26 KiB
C++
682 lines
26 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*
|
|
* This file is part of the LibreOffice project.
|
|
*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
*
|
|
* This file incorporates work covered by the following license notice:
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
* with this work for additional information regarding copyright
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
|
*/
|
|
|
|
#include <sal/config.h>
|
|
|
|
#include <rtl/textcvt.h>
|
|
|
|
#include "handleundefinedunicodetotextchar.hxx"
|
|
#include "tenchelp.hxx"
|
|
#include "unichars.hxx"
|
|
|
|
/* DBCS to Unicode conversion routine use a lead table for the first byte, */
|
|
/* where we determine the trail table or for single byte chars the unicode */
|
|
/* value. We have for all lead byte a separate table, because we can */
|
|
/* then share many tables for different charset encodings. */
|
|
|
|
sal_Size ImplDBCSToUnicode( const void* pData, SAL_UNUSED_PARAMETER void*,
|
|
const char* pSrcBuf, sal_Size nSrcBytes,
|
|
sal_Unicode* pDestBuf, sal_Size nDestChars,
|
|
sal_uInt32 nFlags, sal_uInt32* pInfo,
|
|
sal_Size* pSrcCvtBytes )
|
|
{
|
|
unsigned char cTrail;
|
|
sal_Unicode cConv;
|
|
const ImplDBCSToUniLeadTab* pLeadEntry;
|
|
const ImplDBCSConvertData* pConvertData = static_cast<const ImplDBCSConvertData*>(pData);
|
|
const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab;
|
|
sal_Unicode* pEndDestBuf;
|
|
const char* pEndSrcBuf;
|
|
char const * startOfCurrentChar = pSrcBuf;
|
|
|
|
*pInfo = 0;
|
|
pEndDestBuf = pDestBuf+nDestChars;
|
|
pEndSrcBuf = pSrcBuf+nSrcBytes;
|
|
while ( pSrcBuf < pEndSrcBuf )
|
|
{
|
|
unsigned char cLead = static_cast<unsigned char>(*pSrcBuf);
|
|
|
|
/* get entry for the lead byte */
|
|
pLeadEntry = pLeadTab+cLead;
|
|
|
|
/* SingleByte char? */
|
|
if (pLeadEntry->mpToUniTrailTab == nullptr
|
|
|| cLead < pConvertData->mnLeadStart
|
|
|| cLead > pConvertData->mnLeadEnd)
|
|
{
|
|
cConv = pLeadEntry->mnUniChar;
|
|
if ( !cConv && (cLead != 0) )
|
|
{
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED;
|
|
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR )
|
|
{
|
|
if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
|
|
++pSrcBuf;
|
|
} else {
|
|
pSrcBuf = startOfCurrentChar;
|
|
}
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
|
|
break;
|
|
}
|
|
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE )
|
|
{
|
|
pSrcBuf++;
|
|
startOfCurrentChar = pSrcBuf;
|
|
continue;
|
|
}
|
|
cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags);
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* Source buffer too small */
|
|
if ( pSrcBuf +1 == pEndSrcBuf )
|
|
{
|
|
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0 )
|
|
{
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
|
|
break;
|
|
}
|
|
cConv = 0;
|
|
}
|
|
else
|
|
{
|
|
pSrcBuf++;
|
|
cTrail = static_cast<unsigned char>(*pSrcBuf);
|
|
if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
|
|
cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
|
|
else
|
|
cConv = 0;
|
|
|
|
if ( !cConv )
|
|
{
|
|
/* EUDC Ranges */
|
|
sal_uInt16 i;
|
|
const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab;
|
|
for ( i = 0; i < pConvertData->mnEUDCCount; i++ )
|
|
{
|
|
if ( (cLead >= pEUDCTab->mnLeadStart) &&
|
|
(cLead <= pEUDCTab->mnLeadEnd) )
|
|
{
|
|
if ( (cTrail >= pEUDCTab->mnTrail1Start) &&
|
|
(cTrail <= pEUDCTab->mnTrail1End) )
|
|
{
|
|
cConv = pEUDCTab->mnUniStart+
|
|
((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
|
|
(cTrail-pEUDCTab->mnTrail1Start);
|
|
break;
|
|
}
|
|
sal_uInt16 nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1;
|
|
if ( (pEUDCTab->mnTrailCount >= 2) &&
|
|
(cTrail >= pEUDCTab->mnTrail2Start) &&
|
|
(cTrail <= pEUDCTab->mnTrail2End) )
|
|
{
|
|
cConv = pEUDCTab->mnUniStart+
|
|
((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
|
|
nTrailCount+
|
|
(cTrail-pEUDCTab->mnTrail2Start);
|
|
break;
|
|
}
|
|
nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1;
|
|
if ( (pEUDCTab->mnTrailCount >= 3) &&
|
|
(cTrail >= pEUDCTab->mnTrail3Start) &&
|
|
(cTrail <= pEUDCTab->mnTrail3End) )
|
|
{
|
|
cConv = pEUDCTab->mnUniStart+
|
|
((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
|
|
nTrailCount+
|
|
(cTrail-pEUDCTab->mnTrail3Start);
|
|
break;
|
|
}
|
|
}
|
|
|
|
pEUDCTab++;
|
|
}
|
|
|
|
if ( !cConv )
|
|
{
|
|
/* We compare the full range of the trail we defined, */
|
|
/* which can often be greater than the limit. We do this */
|
|
/* so that extensions that don't consider encodings */
|
|
/* correctly treat double-byte characters as a single */
|
|
/* character as much as possible. */
|
|
|
|
if (cLead < pConvertData->mnLeadStart
|
|
|| cLead > pConvertData->mnLeadEnd
|
|
|| cTrail < pConvertData->mnTrailStart
|
|
|| cTrail > pConvertData->mnTrailEnd)
|
|
{
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
|
|
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
|
|
{
|
|
if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
|
|
++pSrcBuf;
|
|
} else {
|
|
pSrcBuf = startOfCurrentChar;
|
|
}
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
|
|
break;
|
|
}
|
|
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
|
|
{
|
|
pSrcBuf++;
|
|
startOfCurrentChar = pSrcBuf;
|
|
continue;
|
|
}
|
|
cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if ( !cConv )
|
|
{
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
|
|
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
|
|
{
|
|
if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
|
|
++pSrcBuf;
|
|
} else {
|
|
pSrcBuf = startOfCurrentChar;
|
|
}
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
|
|
break;
|
|
}
|
|
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
|
|
{
|
|
pSrcBuf++;
|
|
startOfCurrentChar = pSrcBuf;
|
|
continue;
|
|
}
|
|
cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
|
|
}
|
|
}
|
|
|
|
if ( pDestBuf == pEndDestBuf )
|
|
{
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
|
|
break;
|
|
}
|
|
|
|
*pDestBuf = cConv;
|
|
pDestBuf++;
|
|
pSrcBuf++;
|
|
startOfCurrentChar = pSrcBuf;
|
|
}
|
|
|
|
*pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
|
|
return (nDestChars - (pEndDestBuf-pDestBuf));
|
|
}
|
|
|
|
sal_Size ImplUnicodeToDBCS( const void* pData, SAL_UNUSED_PARAMETER void*,
|
|
const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
|
|
char* pDestBuf, sal_Size nDestBytes,
|
|
sal_uInt32 nFlags, sal_uInt32* pInfo,
|
|
sal_Size* pSrcCvtChars )
|
|
{
|
|
sal_uInt16 cConv;
|
|
sal_Unicode c;
|
|
const ImplUniToDBCSHighTab* pHighEntry;
|
|
const ImplDBCSConvertData* pConvertData = static_cast<const ImplDBCSConvertData*>(pData);
|
|
const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab;
|
|
char* pEndDestBuf;
|
|
const sal_Unicode* pEndSrcBuf;
|
|
|
|
bool bCheckRange =
|
|
pConvertData->mnLeadStart != 0 || pConvertData->mnLeadEnd != 0xFF;
|
|
/* this statement has the effect that this extra check is only done for
|
|
EUC-KR, which uses the MS-949 tables, but does not support the full
|
|
range of MS-949 */
|
|
|
|
*pInfo = 0;
|
|
pEndDestBuf = pDestBuf+nDestBytes;
|
|
pEndSrcBuf = pSrcBuf+nSrcChars;
|
|
while ( pSrcBuf < pEndSrcBuf )
|
|
{
|
|
c = *pSrcBuf;
|
|
unsigned char nHighChar = static_cast<unsigned char>((c >> 8) & 0xFF);
|
|
unsigned char nLowChar = static_cast<unsigned char>(c & 0xFF);
|
|
|
|
/* get entry for the high byte */
|
|
pHighEntry = pHighTab+nHighChar;
|
|
|
|
/* is low byte in the table range */
|
|
if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
|
|
{
|
|
cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
|
|
if (bCheckRange && cConv > 0x7F
|
|
&& ((cConv >> 8) < pConvertData->mnLeadStart
|
|
|| (cConv >> 8) > pConvertData->mnLeadEnd
|
|
|| (cConv & 0xFF) < pConvertData->mnTrailStart
|
|
|| (cConv & 0xFF) > pConvertData->mnTrailEnd))
|
|
cConv = 0;
|
|
}
|
|
else
|
|
cConv = 0;
|
|
|
|
if (cConv == 0 && c != 0)
|
|
{
|
|
/* Map to EUDC ranges: */
|
|
ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab;
|
|
sal_uInt32 i;
|
|
for (i = 0; i < pConvertData->mnEUDCCount; ++i)
|
|
{
|
|
if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd)
|
|
{
|
|
sal_uInt32 nIndex = c - pEUDCTab->mnUniStart;
|
|
sal_uInt32 nLeadOff
|
|
= nIndex / pEUDCTab->mnTrailRangeCount;
|
|
sal_uInt32 nTrailOff
|
|
= nIndex % pEUDCTab->mnTrailRangeCount;
|
|
sal_uInt32 nSize;
|
|
cConv = static_cast<sal_uInt16>((pEUDCTab->mnLeadStart + nLeadOff) << 8);
|
|
nSize
|
|
= pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1;
|
|
if (nTrailOff < nSize)
|
|
{
|
|
cConv |= pEUDCTab->mnTrail1Start + nTrailOff;
|
|
break;
|
|
}
|
|
nTrailOff -= nSize;
|
|
nSize
|
|
= pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1;
|
|
if (nTrailOff < nSize)
|
|
{
|
|
cConv |= pEUDCTab->mnTrail2Start + nTrailOff;
|
|
break;
|
|
}
|
|
nTrailOff -= nSize;
|
|
cConv |= pEUDCTab->mnTrail3Start + nTrailOff;
|
|
break;
|
|
}
|
|
pEUDCTab++;
|
|
}
|
|
|
|
/* FIXME
|
|
* SB: Not sure why this is in here. Plus, it does not work as
|
|
* intended when (c & 0xFF) == 0, because the next !cConv check
|
|
* will then think c has not yet been converted...
|
|
*/
|
|
if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START
|
|
&& c <= RTL_TEXTCVT_BYTE_PRIVATE_END)
|
|
{
|
|
if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
|
|
cConv = static_cast< char >(static_cast< unsigned char >(c & 0xFF));
|
|
}
|
|
}
|
|
|
|
if (cConv == 0 && c != 0)
|
|
{
|
|
if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
|
|
{
|
|
/* !!! */
|
|
}
|
|
|
|
if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
|
|
{
|
|
/* !!! */
|
|
}
|
|
|
|
/* Handle undefined and surrogates characters */
|
|
/* (all surrogates characters are undefined) */
|
|
if (sal::detail::textenc::handleUndefinedUnicodeToTextChar(
|
|
&pSrcBuf, pEndSrcBuf, &pDestBuf, pEndDestBuf, nFlags,
|
|
pInfo))
|
|
continue;
|
|
break;
|
|
}
|
|
|
|
/* SingleByte */
|
|
if ( !(cConv & 0xFF00) )
|
|
{
|
|
if ( pDestBuf == pEndDestBuf )
|
|
{
|
|
*pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
|
|
break;
|
|
}
|
|
|
|
*pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
|
|
pDestBuf++;
|
|
}
|
|
else
|
|
{
|
|
if ( pDestBuf+1 >= pEndDestBuf )
|
|
{
|
|
*pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
|
|
break;
|
|
}
|
|
|
|
*pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
|
|
pDestBuf++;
|
|
*pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
|
|
pDestBuf++;
|
|
}
|
|
|
|
pSrcBuf++;
|
|
}
|
|
|
|
*pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
|
|
return (nDestBytes - (pEndDestBuf-pDestBuf));
|
|
}
|
|
|
|
#define JIS_EUC_LEAD_OFF 0x80
|
|
#define JIS_EUC_TRAIL_OFF 0x80
|
|
|
|
sal_Size ImplEUCJPToUnicode( const void* pData,
|
|
SAL_UNUSED_PARAMETER void*,
|
|
const char* pSrcBuf, sal_Size nSrcBytes,
|
|
sal_Unicode* pDestBuf, sal_Size nDestChars,
|
|
sal_uInt32 nFlags, sal_uInt32* pInfo,
|
|
sal_Size* pSrcCvtBytes )
|
|
{
|
|
unsigned char cLead = '\0';
|
|
unsigned char cTrail = '\0';
|
|
sal_Unicode cConv;
|
|
const ImplDBCSToUniLeadTab* pLeadEntry;
|
|
const ImplDBCSToUniLeadTab* pLeadTab;
|
|
const ImplEUCJPConvertData* pConvertData = static_cast<const ImplEUCJPConvertData*>(pData);
|
|
sal_Unicode* pEndDestBuf;
|
|
const char* pEndSrcBuf;
|
|
char const * startOfCurrentChar = pSrcBuf;
|
|
|
|
*pInfo = 0;
|
|
pEndDestBuf = pDestBuf+nDestChars;
|
|
pEndSrcBuf = pSrcBuf+nSrcBytes;
|
|
while ( pSrcBuf < pEndSrcBuf )
|
|
{
|
|
unsigned char c = static_cast<unsigned char>(*pSrcBuf);
|
|
|
|
/* ASCII */
|
|
if ( c <= 0x7F )
|
|
cConv = c;
|
|
else
|
|
{
|
|
/* SS2 - Half-width katakana */
|
|
/* 8E + A1-DF */
|
|
if ( c == 0x8E )
|
|
{
|
|
/* Source buffer too small */
|
|
if ( pSrcBuf + 1 == pEndSrcBuf )
|
|
{
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
|
|
break;
|
|
}
|
|
|
|
pSrcBuf++;
|
|
c = static_cast<unsigned char>(*pSrcBuf);
|
|
if ( (c >= 0xA1) && (c <= 0xDF) )
|
|
cConv = 0xFF61+(c-0xA1);
|
|
else
|
|
{
|
|
cConv = 0;
|
|
cLead = 0x8E;
|
|
cTrail = c;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
/* SS3 - JIS 0212-1990 */
|
|
/* 8F + A1-FE + A1-FE */
|
|
if ( c == 0x8F )
|
|
{
|
|
/* Source buffer too small */
|
|
if (pEndSrcBuf - pSrcBuf < 3)
|
|
{
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
|
|
break;
|
|
}
|
|
|
|
pSrcBuf++;
|
|
cLead = static_cast<unsigned char>(*pSrcBuf);
|
|
pSrcBuf++;
|
|
cTrail = static_cast<unsigned char>(*pSrcBuf);
|
|
pLeadTab = pConvertData->mpJIS0212ToUniLeadTab;
|
|
}
|
|
/* CodeSet 2 JIS 0208-1997 */
|
|
/* A1-FE + A1-FE */
|
|
else
|
|
{
|
|
/* Source buffer too small */
|
|
if ( pSrcBuf + 1 == pEndSrcBuf )
|
|
{
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOOSMALL;
|
|
break;
|
|
}
|
|
|
|
cLead = c;
|
|
pSrcBuf++;
|
|
cTrail = static_cast<unsigned char>(*pSrcBuf);
|
|
pLeadTab = pConvertData->mpJIS0208ToUniLeadTab;
|
|
}
|
|
|
|
/* Undefined Range */
|
|
if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
|
|
cConv = 0;
|
|
else
|
|
{
|
|
cLead -= JIS_EUC_LEAD_OFF;
|
|
cTrail -= JIS_EUC_TRAIL_OFF;
|
|
pLeadEntry = pLeadTab+cLead;
|
|
if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
|
|
cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
|
|
else
|
|
cConv = 0;
|
|
}
|
|
}
|
|
|
|
if ( !cConv )
|
|
{
|
|
/* We compare the full range of the trail we defined, */
|
|
/* which can often be greater than the limit. We do this */
|
|
/* so that extensions that don't consider encodings */
|
|
/* correctly treat double-byte characters as a single */
|
|
/* character as much as possible. */
|
|
|
|
if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
|
|
{
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
|
|
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
|
|
{
|
|
if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
|
|
++pSrcBuf;
|
|
} else {
|
|
pSrcBuf = startOfCurrentChar;
|
|
}
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
|
|
break;
|
|
}
|
|
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
|
|
{
|
|
pSrcBuf++;
|
|
startOfCurrentChar = pSrcBuf;
|
|
continue;
|
|
}
|
|
cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
|
|
}
|
|
else
|
|
{
|
|
if ((nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0) {
|
|
++pSrcBuf;
|
|
} else {
|
|
pSrcBuf = startOfCurrentChar;
|
|
}
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
|
|
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
|
|
{
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
|
|
break;
|
|
}
|
|
if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
|
|
{
|
|
pSrcBuf++;
|
|
startOfCurrentChar = pSrcBuf;
|
|
continue;
|
|
}
|
|
cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
|
|
}
|
|
}
|
|
}
|
|
|
|
if ( pDestBuf == pEndDestBuf )
|
|
{
|
|
*pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOOSMALL;
|
|
break;
|
|
}
|
|
|
|
*pDestBuf = cConv;
|
|
pDestBuf++;
|
|
pSrcBuf++;
|
|
startOfCurrentChar = pSrcBuf;
|
|
}
|
|
|
|
*pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
|
|
return (nDestChars - (pEndDestBuf-pDestBuf));
|
|
}
|
|
|
|
sal_Size ImplUnicodeToEUCJP( const void* pData,
|
|
SAL_UNUSED_PARAMETER void*,
|
|
const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
|
|
char* pDestBuf, sal_Size nDestBytes,
|
|
sal_uInt32 nFlags, sal_uInt32* pInfo,
|
|
sal_Size* pSrcCvtChars )
|
|
{
|
|
sal_uInt32 cConv;
|
|
sal_Unicode c;
|
|
unsigned char nHighChar;
|
|
unsigned char nLowChar;
|
|
const ImplUniToDBCSHighTab* pHighEntry;
|
|
const ImplUniToDBCSHighTab* pHighTab;
|
|
const ImplEUCJPConvertData* pConvertData = static_cast<const ImplEUCJPConvertData*>(pData);
|
|
char* pEndDestBuf;
|
|
const sal_Unicode* pEndSrcBuf;
|
|
|
|
*pInfo = 0;
|
|
pEndDestBuf = pDestBuf+nDestBytes;
|
|
pEndSrcBuf = pSrcBuf+nSrcChars;
|
|
while ( pSrcBuf < pEndSrcBuf )
|
|
{
|
|
c = *pSrcBuf;
|
|
|
|
/* ASCII */
|
|
if ( c <= 0x7F )
|
|
cConv = c;
|
|
/* Half-width katakana */
|
|
else if ( (c >= 0xFF61) && (c <= 0xFF9F) )
|
|
cConv = 0x8E00+0xA1+(c-0xFF61);
|
|
else
|
|
{
|
|
nHighChar = static_cast<unsigned char>((c >> 8) & 0xFF);
|
|
nLowChar = static_cast<unsigned char>(c & 0xFF);
|
|
|
|
/* JIS 0208 */
|
|
pHighTab = pConvertData->mpUniToJIS0208HighTab;
|
|
pHighEntry = pHighTab+nHighChar;
|
|
if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
|
|
{
|
|
cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
|
|
if (cConv != 0)
|
|
cConv |= 0x8080;
|
|
}
|
|
else
|
|
cConv = 0;
|
|
|
|
/* JIS 0212 */
|
|
if ( !cConv )
|
|
{
|
|
pHighTab = pConvertData->mpUniToJIS0212HighTab;
|
|
pHighEntry = pHighTab+nHighChar;
|
|
if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
|
|
{
|
|
cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
|
|
if (cConv != 0)
|
|
cConv |= 0x8F8080;
|
|
}
|
|
|
|
if ( !cConv )
|
|
{
|
|
if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
|
|
{
|
|
/* !!! */
|
|
}
|
|
|
|
if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
|
|
{
|
|
/* !!! */
|
|
}
|
|
|
|
/* Handle undefined and surrogates characters */
|
|
/* (all surrogates characters are undefined) */
|
|
if (sal::detail::textenc::handleUndefinedUnicodeToTextChar(
|
|
&pSrcBuf, pEndSrcBuf, &pDestBuf, pEndDestBuf,
|
|
nFlags, pInfo))
|
|
continue;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* SingleByte */
|
|
if ( !(cConv & 0xFFFF00) )
|
|
{
|
|
if ( pDestBuf == pEndDestBuf )
|
|
{
|
|
*pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
|
|
break;
|
|
}
|
|
|
|
*pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
|
|
pDestBuf++;
|
|
}
|
|
/* DoubleByte */
|
|
else if ( !(cConv & 0xFF0000) )
|
|
{
|
|
if ( pDestBuf+1 >= pEndDestBuf )
|
|
{
|
|
*pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
|
|
break;
|
|
}
|
|
|
|
*pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
|
|
pDestBuf++;
|
|
*pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
|
|
pDestBuf++;
|
|
}
|
|
else
|
|
{
|
|
if ( pDestBuf+2 >= pEndDestBuf )
|
|
{
|
|
*pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
|
|
break;
|
|
}
|
|
|
|
*pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 16) & 0xFF));
|
|
pDestBuf++;
|
|
*pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
|
|
pDestBuf++;
|
|
*pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
|
|
pDestBuf++;
|
|
}
|
|
|
|
pSrcBuf++;
|
|
}
|
|
|
|
*pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
|
|
return (nDestBytes - (pEndDestBuf-pDestBuf));
|
|
}
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|