office-gobmx/i18npool/source/characterclassification/cclass_unicode_parser.cxx
Michael Stahl 29f5742bc8 tdf#125154 i18npool,sw: fix group separators in numbers in formulas
Commit 776f7e7463 changed cclass_Unicode
to reject group separators in numbers by default, but users are
complaining that the neat "5.000" numbers in their existing documents
are now considered invalid.

* in SwCalc, use GROUP_SEPARATOR_IN_NUMBER
* in cclass_Unicode::parseText(), reject a group separator if it is not
  followed by at least 3 digits

With this, a number from tdf#42518 "0.19" is still considered invalid,
while "5.000" is now valid again.

Change-Id: If86f2ed4c27be16f866d7f4cee00789344e9ee2e
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/153047
Tested-by: Michael Stahl <michael.stahl@allotropia.de>
Reviewed-by: Michael Stahl <michael.stahl@allotropia.de>
2023-06-16 16:22:22 +02:00

1076 lines
45 KiB
C++

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <cclass_unicode.hxx>
#include <unicode/uchar.h>
#include <rtl/character.hxx>
#include <rtl/math.hxx>
#include <rtl/ustring.hxx>
#include <com/sun/star/i18n/KParseTokens.hpp>
#include <com/sun/star/i18n/KParseType.hpp>
#include <com/sun/star/i18n/LocaleData2.hpp>
#include <com/sun/star/i18n/NativeNumberMode.hpp>
#include <com/sun/star/i18n/NativeNumberSupplier.hpp>
#include <string.h>
#include <string_view>
using namespace ::com::sun::star::uno;
using namespace ::com::sun::star::i18n;
using namespace ::com::sun::star::lang;
#define TOKEN_DIGIT_FLAGS (ParserFlags::CHAR_VALUE | ParserFlags::VALUE | ParserFlags::VALUE_EXP | ParserFlags::VALUE_EXP_VALUE | ParserFlags::VALUE_DIGIT)
namespace i18npool {
// Default identifier/name specification is [A-Za-z_][A-Za-z0-9_]*
const sal_uInt8 cclass_Unicode::nDefCnt = 128;
const ParserFlags cclass_Unicode::pDefaultParserTable[ nDefCnt ] =
{
// (...) == Calc formula compiler specific, commented out and modified
/* \0 */ ParserFlags::EXCLUDED,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
/* 9 \t */ ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL)
ParserFlags::ILLEGAL,
/* 11 \v */ ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL)
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
ParserFlags::ILLEGAL,
/* 32 */ ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
/* 33 ! */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
/* 34 " */ ParserFlags::CHAR_STRING | ParserFlags::STRING_SEP,
/* 35 # */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::WORD_SEP)
/* 36 $ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::CHAR_WORD | ParserFlags::WORD)
/* 37 % */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::VALUE)
/* 38 & */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
/* 39 ' */ ParserFlags::NAME_SEP,
/* 40 ( */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
/* 41 ) */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
/* 42 * */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
/* 43 + */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP | ParserFlags::VALUE_EXP | ParserFlags::VALUE_SIGN,
/* 44 , */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::CHAR_VALUE | ParserFlags::VALUE)
/* 45 - */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP | ParserFlags::VALUE_EXP | ParserFlags::VALUE_SIGN,
/* 46 . */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::WORD | ParserFlags::CHAR_VALUE | ParserFlags::VALUE)
/* 47 / */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
//for ( i = 48; i < 58; i++ )
/* 48 0 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
/* 49 1 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
/* 50 2 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
/* 51 3 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
/* 52 4 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
/* 53 5 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
/* 54 6 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
/* 55 7 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
/* 56 8 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
/* 57 9 */ TOKEN_DIGIT_FLAGS | ParserFlags::WORD,
/* 58 : */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::WORD)
/* 59 ; */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
/* 60 < */ ParserFlags::CHAR_BOOL | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
/* 61 = */ ParserFlags::CHAR | ParserFlags::BOOL | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
/* 62 > */ ParserFlags::CHAR_BOOL | ParserFlags::BOOL | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
/* 63 ? */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::CHAR_WORD | ParserFlags::WORD)
/* 64 @ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
//for ( i = 65; i < 91; i++ )
/* 65 A */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 66 B */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 67 C */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 68 D */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 69 E */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 70 F */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 71 G */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 72 H */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 73 I */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 74 J */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 75 K */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 76 L */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 77 M */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 78 N */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 79 O */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 80 P */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 81 Q */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 82 R */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 83 S */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 84 T */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 85 U */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 86 V */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 87 W */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 88 X */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 89 Y */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 90 Z */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 91 [ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
/* 92 \ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
/* 93 ] */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
/* 94 ^ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP,
/* 95 _ */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 96 ` */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
//for ( i = 97; i < 123; i++ )
/* 97 a */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 98 b */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 99 c */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 100 d */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 101 e */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 102 f */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 103 g */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 104 h */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 105 i */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 106 j */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 107 k */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 108 l */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 109 m */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 110 n */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 111 o */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 112 p */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 113 q */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 114 r */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 115 s */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 116 t */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 117 u */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 118 v */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 119 w */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 120 x */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 121 y */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 122 z */ ParserFlags::CHAR_WORD | ParserFlags::WORD,
/* 123 { */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
/* 124 | */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
/* 125 } */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
/* 126 ~ */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP, // (ParserFlags::ILLEGAL // UNUSED)
/* 127 */ ParserFlags::CHAR | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP // (ParserFlags::ILLEGAL // UNUSED)
};
const sal_Int32 cclass_Unicode::pParseTokensType[ nDefCnt ] =
{
/* \0 */ KParseTokens::ASC_OTHER,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
/* 9 \t */ KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
/* 11 \v */ KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
KParseTokens::ASC_CONTROL,
/* 32 */ KParseTokens::ASC_OTHER,
/* 33 ! */ KParseTokens::ASC_OTHER,
/* 34 " */ KParseTokens::ASC_OTHER,
/* 35 # */ KParseTokens::ASC_OTHER,
/* 36 $ */ KParseTokens::ASC_DOLLAR,
/* 37 % */ KParseTokens::ASC_OTHER,
/* 38 & */ KParseTokens::ASC_OTHER,
/* 39 ' */ KParseTokens::ASC_OTHER,
/* 40 ( */ KParseTokens::ASC_OTHER,
/* 41 ) */ KParseTokens::ASC_OTHER,
/* 42 * */ KParseTokens::ASC_OTHER,
/* 43 + */ KParseTokens::ASC_OTHER,
/* 44 , */ KParseTokens::ASC_OTHER,
/* 45 - */ KParseTokens::ASC_OTHER,
/* 46 . */ KParseTokens::ASC_DOT,
/* 47 / */ KParseTokens::ASC_OTHER,
//for ( i = 48; i < 58; i++ )
/* 48 0 */ KParseTokens::ASC_DIGIT,
/* 49 1 */ KParseTokens::ASC_DIGIT,
/* 50 2 */ KParseTokens::ASC_DIGIT,
/* 51 3 */ KParseTokens::ASC_DIGIT,
/* 52 4 */ KParseTokens::ASC_DIGIT,
/* 53 5 */ KParseTokens::ASC_DIGIT,
/* 54 6 */ KParseTokens::ASC_DIGIT,
/* 55 7 */ KParseTokens::ASC_DIGIT,
/* 56 8 */ KParseTokens::ASC_DIGIT,
/* 57 9 */ KParseTokens::ASC_DIGIT,
/* 58 : */ KParseTokens::ASC_COLON,
/* 59 ; */ KParseTokens::ASC_OTHER,
/* 60 < */ KParseTokens::ASC_OTHER,
/* 61 = */ KParseTokens::ASC_OTHER,
/* 62 > */ KParseTokens::ASC_OTHER,
/* 63 ? */ KParseTokens::ASC_OTHER,
/* 64 @ */ KParseTokens::ASC_OTHER,
//for ( i = 65; i < 91; i++ )
/* 65 A */ KParseTokens::ASC_UPALPHA,
/* 66 B */ KParseTokens::ASC_UPALPHA,
/* 67 C */ KParseTokens::ASC_UPALPHA,
/* 68 D */ KParseTokens::ASC_UPALPHA,
/* 69 E */ KParseTokens::ASC_UPALPHA,
/* 70 F */ KParseTokens::ASC_UPALPHA,
/* 71 G */ KParseTokens::ASC_UPALPHA,
/* 72 H */ KParseTokens::ASC_UPALPHA,
/* 73 I */ KParseTokens::ASC_UPALPHA,
/* 74 J */ KParseTokens::ASC_UPALPHA,
/* 75 K */ KParseTokens::ASC_UPALPHA,
/* 76 L */ KParseTokens::ASC_UPALPHA,
/* 77 M */ KParseTokens::ASC_UPALPHA,
/* 78 N */ KParseTokens::ASC_UPALPHA,
/* 79 O */ KParseTokens::ASC_UPALPHA,
/* 80 P */ KParseTokens::ASC_UPALPHA,
/* 81 Q */ KParseTokens::ASC_UPALPHA,
/* 82 R */ KParseTokens::ASC_UPALPHA,
/* 83 S */ KParseTokens::ASC_UPALPHA,
/* 84 T */ KParseTokens::ASC_UPALPHA,
/* 85 U */ KParseTokens::ASC_UPALPHA,
/* 86 V */ KParseTokens::ASC_UPALPHA,
/* 87 W */ KParseTokens::ASC_UPALPHA,
/* 88 X */ KParseTokens::ASC_UPALPHA,
/* 89 Y */ KParseTokens::ASC_UPALPHA,
/* 90 Z */ KParseTokens::ASC_UPALPHA,
/* 91 [ */ KParseTokens::ASC_OTHER,
/* 92 \ */ KParseTokens::ASC_OTHER,
/* 93 ] */ KParseTokens::ASC_OTHER,
/* 94 ^ */ KParseTokens::ASC_OTHER,
/* 95 _ */ KParseTokens::ASC_UNDERSCORE,
/* 96 ` */ KParseTokens::ASC_OTHER,
//for ( i = 97; i < 123; i++ )
/* 97 a */ KParseTokens::ASC_LOALPHA,
/* 98 b */ KParseTokens::ASC_LOALPHA,
/* 99 c */ KParseTokens::ASC_LOALPHA,
/* 100 d */ KParseTokens::ASC_LOALPHA,
/* 101 e */ KParseTokens::ASC_LOALPHA,
/* 102 f */ KParseTokens::ASC_LOALPHA,
/* 103 g */ KParseTokens::ASC_LOALPHA,
/* 104 h */ KParseTokens::ASC_LOALPHA,
/* 105 i */ KParseTokens::ASC_LOALPHA,
/* 106 j */ KParseTokens::ASC_LOALPHA,
/* 107 k */ KParseTokens::ASC_LOALPHA,
/* 108 l */ KParseTokens::ASC_LOALPHA,
/* 109 m */ KParseTokens::ASC_LOALPHA,
/* 110 n */ KParseTokens::ASC_LOALPHA,
/* 111 o */ KParseTokens::ASC_LOALPHA,
/* 112 p */ KParseTokens::ASC_LOALPHA,
/* 113 q */ KParseTokens::ASC_LOALPHA,
/* 114 r */ KParseTokens::ASC_LOALPHA,
/* 115 s */ KParseTokens::ASC_LOALPHA,
/* 116 t */ KParseTokens::ASC_LOALPHA,
/* 117 u */ KParseTokens::ASC_LOALPHA,
/* 118 v */ KParseTokens::ASC_LOALPHA,
/* 119 w */ KParseTokens::ASC_LOALPHA,
/* 120 x */ KParseTokens::ASC_LOALPHA,
/* 121 y */ KParseTokens::ASC_LOALPHA,
/* 122 z */ KParseTokens::ASC_LOALPHA,
/* 123 { */ KParseTokens::ASC_OTHER,
/* 124 | */ KParseTokens::ASC_OTHER,
/* 125 } */ KParseTokens::ASC_OTHER,
/* 126 ~ */ KParseTokens::ASC_OTHER,
/* 127 */ KParseTokens::ASC_OTHER
};
// static
const sal_Unicode* cclass_Unicode::StrChr( const sal_Unicode* pStr, sal_uInt32 c )
{
if ( !pStr )
return nullptr;
sal_Unicode cs[2];
auto const n = rtl::splitSurrogates(c, cs);
while ( *pStr )
{
if ( *pStr == cs[0] && (n == 1 || pStr[1] == cs[1]) )
return pStr;
pStr++;
}
return nullptr;
}
sal_Int32 cclass_Unicode::getParseTokensType(sal_uInt32 const c, bool const isFirst)
{
if ( c < nDefCnt )
return pParseTokensType[ sal_uInt8(c) ];
else
{
//! all KParseTokens::UNI_... must be matched
switch (u_charType(c))
{
case U_UPPERCASE_LETTER :
return KParseTokens::UNI_UPALPHA;
case U_LOWERCASE_LETTER :
return KParseTokens::UNI_LOALPHA;
case U_TITLECASE_LETTER :
return KParseTokens::UNI_TITLE_ALPHA;
case U_MODIFIER_LETTER :
return KParseTokens::UNI_MODIFIER_LETTER;
case U_OTHER_LETTER :
// Non_Spacing_Mark could not be as leading character
if (isFirst) break;
[[fallthrough]]; // treat it as Other_Letter.
case U_NON_SPACING_MARK :
return KParseTokens::UNI_OTHER_LETTER;
case U_DECIMAL_DIGIT_NUMBER :
return KParseTokens::UNI_DIGIT;
case U_LETTER_NUMBER :
return KParseTokens::UNI_LETTER_NUMBER;
case U_OTHER_NUMBER :
return KParseTokens::UNI_OTHER_NUMBER;
}
return KParseTokens::UNI_OTHER;
}
}
void cclass_Unicode::setupInternational( const Locale& rLocale )
{
bool bChanged = (aParserLocale.Language != rLocale.Language
|| aParserLocale.Country != rLocale.Country
|| aParserLocale.Variant != rLocale.Variant);
if ( bChanged )
{
aParserLocale.Language = rLocale.Language;
aParserLocale.Country = rLocale.Country;
aParserLocale.Variant = rLocale.Variant;
}
if ( !mxLocaleData.is() )
{
mxLocaleData.set( LocaleData2::create(m_xContext) );
}
}
void cclass_Unicode::setupParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
const OUString& userDefinedCharactersCont )
{
bool bIntlEqual = (rLocale.Language == aParserLocale.Language &&
rLocale.Country == aParserLocale.Country &&
rLocale.Variant == aParserLocale.Variant);
if ( !pTable || !bIntlEqual ||
startCharTokenType != nStartTypes ||
contCharTokenType != nContTypes ||
userDefinedCharactersStart != aStartChars ||
userDefinedCharactersCont != aContChars )
initParserTable( rLocale, startCharTokenType, userDefinedCharactersStart,
contCharTokenType, userDefinedCharactersCont );
}
void cclass_Unicode::initParserTable( const Locale& rLocale, sal_Int32 startCharTokenType,
const OUString& userDefinedCharactersStart, sal_Int32 contCharTokenType,
const OUString& userDefinedCharactersCont )
{
// (Re)Init
setupInternational( rLocale );
// Memory of pTable is reused.
if ( !pTable )
pTable.reset(new ParserFlags[nDefCnt]);
memcpy( pTable.get(), pDefaultParserTable, sizeof(ParserFlags) * nDefCnt );
// Start and cont tables only need reallocation if different length.
if ( pStart && userDefinedCharactersStart.getLength() != aStartChars.getLength() )
{
pStart.reset();
}
if ( pCont && userDefinedCharactersCont.getLength() != aContChars.getLength() )
{
pCont.reset();
}
nStartTypes = startCharTokenType;
nContTypes = contCharTokenType;
aStartChars = userDefinedCharactersStart;
aContChars = userDefinedCharactersCont;
// specials
if( mxLocaleData.is() )
{
LocaleDataItem2 aItem =
mxLocaleData->getLocaleItem2( aParserLocale );
//!TODO: theoretically separators may be a string, adjustment would have to be
//! done here and in parsing and in ::rtl::math::stringToDouble()
cGroupSep = aItem.thousandSeparator[0];
cDecimalSep = aItem.decimalSeparator[0];
cDecimalSepAlt = aItem.decimalSeparatorAlternative.toChar();
}
if (nContTypes & KParseTokens::GROUP_SEPARATOR_IN_NUMBER)
{
if ( cGroupSep < nDefCnt )
pTable[cGroupSep] |= ParserFlags::VALUE;
}
else
{
cGroupSep = 0;
}
if ( cDecimalSep < nDefCnt )
pTable[cDecimalSep] |= ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
if ( cDecimalSepAlt && cDecimalSepAlt < nDefCnt )
pTable[cDecimalSepAlt] |= ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
// Modify characters according to KParseTokens definitions.
{
using namespace KParseTokens;
sal_uInt8 i;
if ( !(nStartTypes & ASC_UPALPHA) )
for ( i = 65; i < 91; i++ )
pTable[i] &= ~ParserFlags::CHAR_WORD; // not allowed as start character
if ( !(nContTypes & ASC_UPALPHA) )
for ( i = 65; i < 91; i++ )
pTable[i] &= ~ParserFlags::WORD; // not allowed as cont character
if ( !(nStartTypes & ASC_LOALPHA) )
for ( i = 97; i < 123; i++ )
pTable[i] &= ~ParserFlags::CHAR_WORD; // not allowed as start character
if ( !(nContTypes & ASC_LOALPHA) )
for ( i = 97; i < 123; i++ )
pTable[i] &= ~ParserFlags::WORD; // not allowed as cont character
if ( nStartTypes & ASC_DIGIT )
for ( i = 48; i < 58; i++ )
pTable[i] |= ParserFlags::CHAR_WORD; // allowed as start character
if ( !(nContTypes & ASC_DIGIT) )
for ( i = 48; i < 58; i++ )
pTable[i] &= ~ParserFlags::WORD; // not allowed as cont character
if ( !(nStartTypes & ASC_UNDERSCORE) )
pTable[95] &= ~ParserFlags::CHAR_WORD; // not allowed as start character
if ( !(nContTypes & ASC_UNDERSCORE) )
pTable[95] &= ~ParserFlags::WORD; // not allowed as cont character
if ( nStartTypes & ASC_DOLLAR )
pTable[36] |= ParserFlags::CHAR_WORD; // allowed as start character
if ( nContTypes & ASC_DOLLAR )
pTable[36] |= ParserFlags::WORD; // allowed as cont character
if ( nStartTypes & ASC_DOT )
pTable[46] |= ParserFlags::CHAR_WORD; // allowed as start character
if ( nContTypes & ASC_DOT )
pTable[46] |= ParserFlags::WORD; // allowed as cont character
if ( nStartTypes & ASC_COLON )
pTable[58] |= ParserFlags::CHAR_WORD; // allowed as start character
if ( nContTypes & ASC_COLON )
pTable[58] |= ParserFlags::WORD; // allowed as cont character
if ( nStartTypes & ASC_CONTROL )
for ( i = 1; i < 32; i++ )
pTable[i] |= ParserFlags::CHAR_WORD; // allowed as start character
if ( nContTypes & ASC_CONTROL )
for ( i = 1; i < 32; i++ )
pTable[i] |= ParserFlags::WORD; // allowed as cont character
if ( nStartTypes & ASC_ANY_BUT_CONTROL )
for ( i = 32; i < nDefCnt; i++ )
pTable[i] |= ParserFlags::CHAR_WORD; // allowed as start character
if ( nContTypes & ASC_ANY_BUT_CONTROL )
for ( i = 32; i < nDefCnt; i++ )
pTable[i] |= ParserFlags::WORD; // allowed as cont character
}
// Merge in (positively override with) user defined characters.
// StartChars
sal_Int32 nLen = aStartChars.getLength();
if ( nLen )
{
if ( !pStart )
pStart.reset(new ParserFlags[ nLen ]);
const sal_Unicode* p = aStartChars.getStr();
for ( sal_Int32 j=0; j<nLen; j++, p++ )
{
pStart[j] = ParserFlags::CHAR_WORD;
if ( *p < nDefCnt )
pTable[*p] |= ParserFlags::CHAR_WORD;
}
}
// ContChars
nLen = aContChars.getLength();
if ( nLen )
{
if ( !pCont )
pCont.reset(new ParserFlags[ nLen ]);
const sal_Unicode* p = aContChars.getStr();
for ( sal_Int32 j=0; j<nLen; j++ )
{
pCont[j] = ParserFlags::WORD;
if ( *p < nDefCnt )
pTable[*p] |= ParserFlags::WORD;
}
}
}
void cclass_Unicode::destroyParserTable()
{
pCont.reset();
pStart.reset();
pTable.reset();
}
ParserFlags cclass_Unicode::getFlags(sal_uInt32 const c, const cclass_Unicode::ScanState eState)
{
ParserFlags nMask;
if ( c < nDefCnt )
nMask = pTable[ sal_uInt8(c) ];
else
nMask = getFlagsExtended(c, eState);
switch ( eState )
{
case ssGetChar :
case ssRewindFromValue :
case ssIgnoreLeadingInRewind :
case ssGetWordFirstChar :
if ( !(nMask & ParserFlags::CHAR_WORD) )
{
nMask |= getStartCharsFlags( c );
if ( nMask & ParserFlags::CHAR_WORD )
nMask &= ~ParserFlags::EXCLUDED;
}
break;
case ssGetValue :
case ssGetWord :
if ( !(nMask & ParserFlags::WORD) )
{
nMask |= getContCharsFlags( c );
if ( nMask & ParserFlags::WORD )
nMask &= ~ParserFlags::EXCLUDED;
}
break;
default:
; // other cases aren't needed, no compiler warning
}
return nMask;
}
ParserFlags cclass_Unicode::getFlagsExtended(sal_uInt32 const c, const cclass_Unicode::ScanState eState) const
{
if ( c == cGroupSep )
return ParserFlags::VALUE;
else if ( c == cDecimalSep )
return ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
else if ( cDecimalSepAlt && c == cDecimalSepAlt )
return ParserFlags::CHAR_VALUE | ParserFlags::VALUE;
bool bStart = (eState == ssGetChar || eState == ssGetWordFirstChar ||
eState == ssRewindFromValue || eState == ssIgnoreLeadingInRewind);
sal_Int32 nTypes = (bStart ? nStartTypes : nContTypes);
//! all KParseTokens::UNI_... must be matched
switch (u_charType(c))
{
case U_UPPERCASE_LETTER :
return (nTypes & KParseTokens::UNI_UPALPHA) ?
(bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
ParserFlags::ILLEGAL;
case U_LOWERCASE_LETTER :
return (nTypes & KParseTokens::UNI_LOALPHA) ?
(bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
ParserFlags::ILLEGAL;
case U_TITLECASE_LETTER :
return (nTypes & KParseTokens::UNI_TITLE_ALPHA) ?
(bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
ParserFlags::ILLEGAL;
case U_MODIFIER_LETTER :
return (nTypes & KParseTokens::UNI_MODIFIER_LETTER) ?
(bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
ParserFlags::ILLEGAL;
case U_NON_SPACING_MARK :
case U_COMBINING_SPACING_MARK :
// Non_Spacing_Mark can't be a leading character,
// nor can a spacing combining mark.
if (bStart)
return ParserFlags::ILLEGAL;
[[fallthrough]]; // treat it as Other_Letter.
case U_OTHER_LETTER :
return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
(bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
ParserFlags::ILLEGAL;
case U_DECIMAL_DIGIT_NUMBER :
return ((nTypes & KParseTokens::UNI_DIGIT) ?
(bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
ParserFlags::ILLEGAL) | TOKEN_DIGIT_FLAGS;
case U_LETTER_NUMBER :
return ((nTypes & KParseTokens::UNI_LETTER_NUMBER) ?
(bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
ParserFlags::ILLEGAL) | TOKEN_DIGIT_FLAGS;
case U_OTHER_NUMBER :
return ((nTypes & KParseTokens::UNI_OTHER_NUMBER) ?
(bStart ? ParserFlags::CHAR_WORD : ParserFlags::WORD) :
ParserFlags::ILLEGAL) | TOKEN_DIGIT_FLAGS;
case U_SPACE_SEPARATOR :
return ((nTypes & KParseTokens::IGNORE_LEADING_WS) ?
ParserFlags::CHAR_DONTCARE : (bStart ? ParserFlags::CHAR_WORD : (ParserFlags::CHAR_DONTCARE | ParserFlags::WORD_SEP | ParserFlags::VALUE_SEP) ));
case U_OTHER_PUNCTUATION:
// fdo#61754 Lets see (if we not at the start) if this is midletter
// punctuation and allow it in a word if it is similarly to
// U_NON_SPACING_MARK, for example U+00B7 MIDDLE DOT.
// tdf#123575 for U+30FB KATAKANA MIDDLE DOT property is not
// U_WB_MIDLETTER but U_WB_KATAKANA instead, explicitly test that
// and U+FF65 HALFWIDTH KATAKANA MIDDLE DOT.
if (bStart || (U_WB_MIDLETTER != u_getIntPropertyValue(c, UCHAR_WORD_BREAK)
&& c != 0x30FB && c != 0xFF65))
return ParserFlags::ILLEGAL;
else
{
//allowing it to continue the word
return (nTypes & KParseTokens::UNI_OTHER_LETTER) ?
ParserFlags::WORD : ParserFlags::ILLEGAL;
}
break;
}
return ParserFlags::ILLEGAL;
}
ParserFlags cclass_Unicode::getStartCharsFlags( sal_uInt32 c )
{
if ( pStart )
{
const sal_Unicode* pStr = aStartChars.getStr();
const sal_Unicode* p = StrChr( pStr, c );
if ( p )
return pStart[ p - pStr ];
}
return ParserFlags::ILLEGAL;
}
ParserFlags cclass_Unicode::getContCharsFlags( sal_Unicode c )
{
if ( pCont )
{
const sal_Unicode* pStr = aContChars.getStr();
const sal_Unicode* p = StrChr( pStr, c );
if ( p )
return pCont[ p - pStr ];
}
return ParserFlags::ILLEGAL;
}
void cclass_Unicode::parseText( ParseResult& r, const OUString& rText, sal_Int32 nPos, sal_Int32 nTokenType )
{
assert(r.LeadingWhiteSpace == 0);
ScanState eState = ssGetChar;
//! All the variables below (plus ParseResult) have to be reset on ssRewindFromValue!
OUStringBuffer aSymbol;
bool isFirst(true);
sal_Int32 index(nPos); // index of next code point after current
sal_Int32 postSymbolIndex(index); // index of code point following last quote
sal_uInt32 current((index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0);
sal_uInt32 cLast = 0;
sal_Int32 nCodePoints(0);
int nDecSeps = 0;
bool bQuote = false;
bool bMightBeWord = true;
bool bMightBeWordLast = true;
bool bDecSepAltUsed = false;
//! All the variables above (plus ParseResult) have to be reset on ssRewindFromValue!
sal_Int32 nextCharIndex(nPos); // == index of nextChar
while ((current != 0) && (eState != ssStop))
{
++nCodePoints;
ParserFlags nMask = getFlags(current, eState);
if ( nMask & ParserFlags::EXCLUDED )
eState = ssBounce;
if ( bMightBeWord )
{ // only relevant for ssGetValue fall back
if ( eState == ssGetChar || eState == ssRewindFromValue ||
eState == ssIgnoreLeadingInRewind )
bMightBeWord = bool(nMask & ParserFlags::CHAR_WORD);
else
bMightBeWord = bool(nMask & ParserFlags::WORD);
}
sal_Int32 nParseTokensType = getParseTokensType(current, isFirst);
isFirst = false;
sal_Int32 const nextIndex(nextCharIndex); // == index of char following current
nextCharIndex = index; // == index of nextChar
sal_uInt32 nextChar((index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0);
switch (eState)
{
case ssGetChar :
case ssRewindFromValue :
case ssIgnoreLeadingInRewind :
{
if ( (nMask & ParserFlags::CHAR_VALUE) && eState != ssRewindFromValue
&& eState != ssIgnoreLeadingInRewind )
{ //! must be first, may fall back to ssGetWord via bMightBeWord
eState = ssGetValue;
if ( nMask & ParserFlags::VALUE_DIGIT )
{
if (128 <= current)
r.TokenType = KParseType::UNI_NUMBER;
else
r.TokenType = KParseType::ASC_NUMBER;
}
else if (current == cDecimalSep || (bDecSepAltUsed = (cDecimalSepAlt && current == cDecimalSepAlt)))
{
if (nextChar)
++nDecSeps;
else
eState = ssRewindFromValue;
// retry for ONE_SINGLE_CHAR or others
}
}
else if ( nMask & ParserFlags::CHAR_WORD )
{
eState = ssGetWord;
r.TokenType = KParseType::IDENTNAME;
}
else if ( nMask & ParserFlags::NAME_SEP )
{
eState = ssGetWordFirstChar;
bQuote = true;
postSymbolIndex = nextCharIndex;
nParseTokensType = 0; // will be taken of first real character
r.TokenType = KParseType::SINGLE_QUOTE_NAME;
}
else if ( nMask & ParserFlags::CHAR_STRING )
{
eState = ssGetString;
postSymbolIndex = nextCharIndex;
nParseTokensType = 0; // will be taken of first real character
r.TokenType = KParseType::DOUBLE_QUOTE_STRING;
}
else if ( nMask & ParserFlags::CHAR_DONTCARE )
{
if ( nStartTypes & KParseTokens::IGNORE_LEADING_WS )
{
if (eState == ssRewindFromValue)
eState = ssIgnoreLeadingInRewind;
r.LeadingWhiteSpace = nextCharIndex - nPos;
nCodePoints--; // exclude leading whitespace
postSymbolIndex = nextCharIndex;
nParseTokensType = 0; // wait until real character
bMightBeWord = true;
}
else
eState = ssBounce;
}
else if ( nMask & ParserFlags::CHAR_BOOL )
{
eState = ssGetBool;
r.TokenType = KParseType::BOOLEAN;
}
else if ( nMask & ParserFlags::CHAR )
{ //! must be last
eState = ssStop;
r.TokenType = KParseType::ONE_SINGLE_CHAR;
}
else
eState = ssBounce; // not known
}
break;
case ssGetValue :
{
if ( nMask & ParserFlags::VALUE_DIGIT )
{
if (128 <= current)
r.TokenType = KParseType::UNI_NUMBER;
else if ( r.TokenType != KParseType::UNI_NUMBER )
r.TokenType = KParseType::ASC_NUMBER;
}
if ( nMask & ParserFlags::VALUE )
{
if (current == cGroupSep)
{
// accept only if it is followed by 3 digits
sal_Int32 tempIndex(index);
sal_uInt32 const nextChar2((tempIndex < rText.getLength()) ? rText.iterateCodePoints(&tempIndex) : 0);
sal_uInt32 const nextChar3((tempIndex < rText.getLength()) ? rText.iterateCodePoints(&tempIndex) : 0);
if (getFlags(nextChar, eState) & ParserFlags::VALUE_DIGIT
&& getFlags(nextChar2, eState) & ParserFlags::VALUE_DIGIT
&& getFlags(nextChar3, eState) & ParserFlags::VALUE_DIGIT)
{
nParseTokensType |= KParseTokens::GROUP_SEPARATOR_IN_NUMBER;
}
else
{
// Trailing group separator character is not a
// group separator.
eState = ssStopBack;
}
}
else if ((current == cDecimalSep ||
(bDecSepAltUsed = (cDecimalSepAlt && current == cDecimalSepAlt))) &&
++nDecSeps > 1)
{
if (nCodePoints == 2)
eState = ssRewindFromValue;
// consecutive separators
else
eState = ssStopBack;
}
// else keep it going
}
else if (current == 'E' || current == 'e')
{
ParserFlags nNext = getFlags(nextChar, eState);
if ( nNext & ParserFlags::VALUE_EXP )
; // keep it going
else if (bMightBeWord && ((nNext & ParserFlags::WORD) || !nextChar))
{ // might be a numerical name (1.2efg)
eState = ssGetWord;
r.TokenType = KParseType::IDENTNAME;
}
else
eState = ssStopBack;
}
else if ( nMask & ParserFlags::VALUE_SIGN )
{
if ( (cLast == 'E') || (cLast == 'e') )
{
ParserFlags nNext = getFlags(nextChar, eState);
if ( nNext & ParserFlags::VALUE_EXP_VALUE )
; // keep it going
else if (bMightBeWord && ((nNext & ParserFlags::WORD) || !nextChar))
{ // might be a numerical name (1.2e+fg)
eState = ssGetWord;
r.TokenType = KParseType::IDENTNAME;
}
else
eState = ssStopBack;
}
else if ( bMightBeWord )
{ // might be a numerical name (1.2+fg)
eState = ssGetWord;
r.TokenType = KParseType::IDENTNAME;
}
else
eState = ssStopBack;
}
else if ( bMightBeWord && (nMask & ParserFlags::WORD) )
{ // might be a numerical name (1995.A1)
eState = ssGetWord;
r.TokenType = KParseType::IDENTNAME;
}
else
eState = ssStopBack;
}
break;
case ssGetWordFirstChar :
eState = ssGetWord;
[[fallthrough]];
case ssGetWord :
{
if ( nMask & ParserFlags::WORD )
; // keep it going
else if ( nMask & ParserFlags::NAME_SEP )
{
if ( bQuote )
{
if ( cLast == '\\' )
{ // escaped
aSymbol.append(
OUString::Concat(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 2))
+ OUString(&current, 1));
}
else
{
eState = ssStop;
aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 1));
}
postSymbolIndex = nextCharIndex;
}
else
eState = ssStopBack;
}
else if ( bQuote )
; // keep it going
else
eState = ssStopBack;
}
break;
case ssGetString :
{
if ( nMask & ParserFlags::STRING_SEP )
{
if ( cLast == '\\' )
{ // escaped
aSymbol.append(
rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 2)
+ OUString(&current, 1));
}
else if (current == nextChar &&
!(nContTypes & KParseTokens::TWO_DOUBLE_QUOTES_BREAK_STRING) )
{ // "" => literal " escaped
aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex));
nextCharIndex = index;
if (index < rText.getLength()) { ++nCodePoints; }
nextChar = (index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0;
}
else
{
eState = ssStop;
aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 1));
}
postSymbolIndex = nextCharIndex;
}
}
break;
case ssGetBool :
{
if ( nMask & ParserFlags::BOOL )
eState = ssStop; // maximum 2: <, >, <>, <=, >=
else
eState = ssStopBack;
}
break;
case ssStopBack :
case ssBounce :
case ssStop :
; // nothing, no compiler warning
break;
}
if ( eState == ssRewindFromValue )
{
r = ParseResult();
index = nPos;
postSymbolIndex = nPos;
nextCharIndex = nPos;
aSymbol.setLength(0);
current = (index < rText.getLength()) ? rText.iterateCodePoints(&index) : 0;
nCodePoints = (nPos < rText.getLength()) ? 1 : 0;
isFirst = true;
cLast = 0;
nDecSeps = 0;
bQuote = false;
bMightBeWord = true;
bMightBeWordLast = true;
bDecSepAltUsed = false;
}
else
{
if ( !(r.TokenType & nTokenType) )
{
if ( (r.TokenType & (KParseType::ASC_NUMBER | KParseType::UNI_NUMBER))
&& (nTokenType & KParseType::IDENTNAME) && bMightBeWord )
; // keep a number that might be a word
else if (r.LeadingWhiteSpace == (nextCharIndex - nPos))
; // keep ignored white space
else if ( !r.TokenType && eState == ssGetValue && (nMask & ParserFlags::VALUE_SEP) )
; // keep uncertain value
else
eState = ssBounce;
}
if ( eState == ssBounce )
{
r.TokenType = 0;
eState = ssStopBack;
}
if ( eState == ssStopBack )
{ // put back
nextChar = rText.iterateCodePoints(&index, -1);
nextCharIndex = nextIndex;
--nCodePoints;
bMightBeWord = bMightBeWordLast;
eState = ssStop;
}
if ( eState != ssStop )
{
if ( !r.StartFlags )
r.StartFlags |= nParseTokensType;
else
r.ContFlags |= nParseTokensType;
}
bMightBeWordLast = bMightBeWord;
cLast = current;
current = nextChar;
}
}
// r.CharLen is the length in characters (not code units) of the parsed
// token not including any leading white space.
r.CharLen = nCodePoints;
r.EndPos = nextCharIndex;
if ( r.TokenType & KParseType::ASC_NUMBER )
{
r.Value = rtl_math_uStringToDouble(rText.getStr() + nPos + r.LeadingWhiteSpace,
rText.getStr() + r.EndPos, (bDecSepAltUsed ? cDecimalSepAlt : cDecimalSep), cGroupSep, nullptr, nullptr);
if ( bMightBeWord )
r.TokenType |= KParseType::IDENTNAME;
}
else if ( r.TokenType & KParseType::UNI_NUMBER )
{
if ( !xNatNumSup.is() )
{
if ( m_xContext.is() )
{
xNatNumSup = NativeNumberSupplier::create( m_xContext );
}
}
OUString aTmp(rText.getStr() + nPos + r.LeadingWhiteSpace,
r.EndPos - nPos - r.LeadingWhiteSpace);
// transliterate to ASCII
aTmp = xNatNumSup->getNativeNumberString( aTmp, aParserLocale,
NativeNumberMode::NATNUM0 );
r.Value = ::rtl::math::stringToDouble( aTmp, cDecimalSep, cGroupSep );
if ( bMightBeWord )
r.TokenType |= KParseType::IDENTNAME;
}
else if ( r.TokenType & (KParseType::SINGLE_QUOTE_NAME | KParseType::DOUBLE_QUOTE_STRING) )
{
if (postSymbolIndex < nextCharIndex)
{ //! open quote
aSymbol.append(rText.subView(postSymbolIndex, nextCharIndex - postSymbolIndex - 1));
r.TokenType |= KParseType::MISSING_QUOTE;
}
r.DequotedNameOrString = aSymbol.makeStringAndClear();
}
}
}
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */