c6c6126aa3
to split it into two constituent parts SvNFFormatData which is the data store for number formats it generally operates on. SvNFLanguageData for data around the current language in use. and then a SvNFEngine which implements the interaction between those parts SvNFEngine has two policies, the typical RW mode and a new RO mode where the SvNFFormatData doesn't change, all formats needed in this mode must already exist. Change-Id: I56b070ccd2e556a0cb1fe609a2fae28e18277c8c Reviewed-on: https://gerrit.libreoffice.org/c/core/+/165146 Tested-by: Jenkins Reviewed-by: Caolán McNamara <caolan.mcnamara@collabora.com>
444 lines
18 KiB
C++
444 lines
18 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*
|
|
* This file is part of the LibreOffice project.
|
|
*
|
|
* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
|
|
*
|
|
* This file incorporates work covered by the following license notice:
|
|
*
|
|
* Licensed to the Apache Software Foundation (ASF) under one or more
|
|
* contributor license agreements. See the NOTICE file distributed
|
|
* with this work for additional information regarding copyright
|
|
* ownership. The ASF licenses this file to you under the Apache
|
|
* License, Version 2.0 (the "License"); you may not use this file
|
|
* except in compliance with the License. You may obtain a copy of
|
|
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
|
|
*/
|
|
|
|
#ifndef INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
|
|
#define INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
|
|
|
|
#include <com/sun/star/uno/Sequence.hxx>
|
|
#include <rtl/ustring.hxx>
|
|
#include <svl/zforlist.hxx>
|
|
#include <tools/date.hxx>
|
|
#include <memory>
|
|
#include <optional>
|
|
|
|
class SvNumberformat;
|
|
class SvNFLanguageData;
|
|
enum class SvNumFormatType : sal_Int16;
|
|
|
|
#define SV_MAX_COUNT_INPUT_STRINGS 20 // max count of substrings in input scanner
|
|
|
|
class ImpSvNumberInputScan
|
|
{
|
|
public:
|
|
explicit ImpSvNumberInputScan(SvNFLanguageData& rCurrentLanguage);
|
|
~ImpSvNumberInputScan();
|
|
|
|
/*!*/ void ChangeIntl(); // MUST be called if language changes
|
|
|
|
/// set reference date for offset calculation
|
|
void ChangeNullDate( const sal_uInt16 nDay,
|
|
const sal_uInt16 nMonth,
|
|
const sal_Int16 nYear );
|
|
|
|
/// convert input string to number
|
|
bool IsNumberFormat( const OUString& rString, /// input string
|
|
SvNumFormatType& F_Type, /// format type (in + out)
|
|
double& fOutNumber, /// value determined (out)
|
|
const SvNumberformat* pFormat, /// number format to which compare against
|
|
const NativeNumberWrapper* pNatNum,
|
|
SvNumInputOptions eInputOptions);
|
|
|
|
/// after IsNumberFormat: get decimal position
|
|
short GetDecPos() const { return nDecPos; }
|
|
/// after IsNumberFormat: get count of numeric substrings in input string
|
|
sal_uInt16 GetNumericsCount() const { return nNumericsCnt; }
|
|
|
|
/// set threshold of two-digit year input
|
|
void SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; }
|
|
/// get threshold of two-digit year input
|
|
sal_uInt16 GetYear2000() const { return nYear2000; }
|
|
|
|
/** Whether input can be forced to ISO 8601 format.
|
|
|
|
Depends on locale's date separator and a specific date format order.
|
|
*/
|
|
bool CanForceToIso8601( DateOrder eDateOrder );
|
|
|
|
void InvalidateDateAcceptancePatterns();
|
|
|
|
/** Whether 'T' separator was detected in an ISO 8601 date+time format.
|
|
*/
|
|
bool HasIso8601Tsep() const { return bIso8601Tsep; }
|
|
|
|
private:
|
|
SvNFLanguageData& mrCurrentLanguageData;
|
|
const SvNumberformat* mpFormat; //* The format to compare against, if any
|
|
std::unique_ptr<OUString[]> pUpperMonthText; //* Array of month names, uppercase
|
|
std::unique_ptr<OUString[]> pUpperAbbrevMonthText; //* Array of month names, abbreviated, uppercase
|
|
std::unique_ptr<OUString[]> pUpperGenitiveMonthText; //* Array of genitive month names, uppercase
|
|
std::unique_ptr<OUString[]> pUpperGenitiveAbbrevMonthText; //* Array of genitive month names, abbreviated, uppercase
|
|
std::unique_ptr<OUString[]> pUpperPartitiveMonthText; //* Array of partitive month names, uppercase
|
|
std::unique_ptr<OUString[]> pUpperPartitiveAbbrevMonthText;//* Array of partitive month names, abbreviated, uppercase
|
|
std::unique_ptr<OUString[]> pUpperDayText; //* Array of day of week names, uppercase
|
|
std::unique_ptr<OUString[]> pUpperAbbrevDayText; //* Array of day of week names, abbreviated, uppercase
|
|
OUString aUpperCurrSymbol; //* Currency symbol, uppercase
|
|
bool bTextInitialized; //* Whether days and months are initialized
|
|
bool bScanGenitiveMonths; //* Whether to scan an input for genitive months
|
|
bool bScanPartitiveMonths; //* Whether to scan an input for partitive months
|
|
std::optional<Date> moNullDate; //* 30Dec1899
|
|
// Variables for provisional results:
|
|
OUString sStrArray[SV_MAX_COUNT_INPUT_STRINGS];//* Array of scanned substrings
|
|
bool IsNum[SV_MAX_COUNT_INPUT_STRINGS]; //* Whether a substring is numeric
|
|
sal_uInt16 nNums[SV_MAX_COUNT_INPUT_STRINGS]; //* Sequence of offsets to numeric strings
|
|
sal_uInt16 nStringsCnt; //* Total count of scanned substrings
|
|
sal_uInt16 nNumericsCnt; //* Count of numeric substrings
|
|
bool bDecSepInDateSeps; //* True <=> DecSep in {.,-,/,DateSep}
|
|
sal_uInt8 nMatchedAllStrings; //* Scan...String() matched all substrings,
|
|
|
|
// bit mask of nMatched... constants
|
|
static const sal_uInt8 nMatchedEndString; // 0x01
|
|
static const sal_uInt8 nMatchedMidString; // 0x02
|
|
static const sal_uInt8 nMatchedStartString; // 0x04
|
|
static const sal_uInt8 nMatchedVirgin; // 0x08
|
|
static const sal_uInt8 nMatchedUsedAsReturn; // 0x10
|
|
|
|
int nSign; // Sign of number
|
|
int nMonth; // Month (1..x) if date
|
|
// negative => short format
|
|
short nMonthPos; // 1 = front, 2 = middle
|
|
// 3 = end
|
|
int nDayOfWeek; // Temporary (!) day of week (1..7,-1..-7) if date
|
|
sal_uInt16 nTimePos; // Index of first time separator (+1)
|
|
short nDecPos; // Index of substring containing "," (+1)
|
|
bool bNegCheck; // '( )' for negative
|
|
short nESign; // Sign of exponent
|
|
short nAmPm; // +1 AM, -1 PM, 0 if none
|
|
short nLogical; // -1 => False, 1 => True
|
|
bool mbEraCE; // Era if date, 0 => BCE, 1 => CE (currently only Gregorian)
|
|
sal_uInt16 nThousand; // Count of group (AKA thousand) separators
|
|
sal_uInt16 nPosThousandString; // Position of concatenated 000,000,000 string
|
|
SvNumFormatType eScannedType; // Scanned type
|
|
SvNumFormatType eSetType; // Preset Type
|
|
|
|
sal_uInt16 nStringScanNumFor; // Fixed strings recognized in
|
|
// pFormat->NumFor[nNumForStringScan]
|
|
short nStringScanSign; // Sign resulting of FixString
|
|
sal_uInt16 nYear2000; // Two-digit threshold
|
|
// Year as 20xx
|
|
// default 18
|
|
// number <= nYear2000 => 20xx
|
|
// number > nYear2000 => 19xx
|
|
|
|
/** State of ISO 8601 detection.
|
|
|
|
0:= don't know yet
|
|
1:= no
|
|
2:= yes, <=2 digits in year
|
|
3:= yes, 3 digits in year
|
|
4:= yes, >=4 digits in year
|
|
|
|
@see MayBeIso8601()
|
|
*/
|
|
sal_uInt8 nMayBeIso8601;
|
|
|
|
/** Whether the 'T' time separator was detected in an ISO 8601 string. */
|
|
bool bIso8601Tsep;
|
|
|
|
/** State of dd-month-yy or yy-month-dd detection, with month name.
|
|
|
|
0:= don't know yet
|
|
1:= no
|
|
2:= yes, dd-month-yy
|
|
3:= yes, yy-month-dd
|
|
|
|
@see MayBeMonthDate()
|
|
*/
|
|
sal_uInt8 nMayBeMonthDate;
|
|
|
|
/** Input matched this locale dependent date acceptance pattern.
|
|
-2 if not checked yet, -1 if no match, >=0 matched pattern.
|
|
|
|
@see IsAcceptedDatePattern()
|
|
*/
|
|
sal_Int32 nAcceptedDatePattern;
|
|
css::uno::Sequence< OUString > sDateAcceptancePatterns;
|
|
|
|
/** If input matched a date acceptance pattern that starts at input
|
|
particle sStrArray[nDatePatternStart].
|
|
|
|
@see IsAcceptedDatePattern()
|
|
*/
|
|
sal_uInt16 nDatePatternStart;
|
|
|
|
/** Count of numbers that matched the accepted pattern, if any, else 0.
|
|
|
|
@see GetDatePatternNumbers()
|
|
*/
|
|
sal_uInt16 nDatePatternNumbers;
|
|
|
|
// Copy assignment is forbidden and not implemented.
|
|
ImpSvNumberInputScan (const ImpSvNumberInputScan &) = delete;
|
|
ImpSvNumberInputScan & operator= (const ImpSvNumberInputScan &) = delete;
|
|
|
|
void Reset(); // Reset all variables before start of analysis
|
|
|
|
void InitText(); // Init of months and days of week
|
|
|
|
// Convert string to double.
|
|
// Only simple unsigned floating point values without any error detection,
|
|
// decimal separator has to be '.'
|
|
// If bForceFraction==true the string is taken to be the fractional part
|
|
// of 0.1234 without the leading 0. (thus being just "1234").
|
|
static double StringToDouble( std::u16string_view aStr,
|
|
bool bForceFraction = false );
|
|
|
|
// Next number/string symbol
|
|
static bool NextNumberStringSymbol( const sal_Unicode*& pStr,
|
|
OUString& rSymbol );
|
|
|
|
// Concatenate ,000,23 blocks
|
|
// in input to 000123
|
|
bool SkipThousands( const sal_Unicode*& pStr, OUString& rSymbol ) const;
|
|
|
|
// Divide numbers/strings into
|
|
// arrays and variables above.
|
|
// Leading blanks and blanks
|
|
// after numbers are thrown away
|
|
void NumberStringDivision( const OUString& rString );
|
|
|
|
|
|
/** Whether rString contains word (!) rWhat at nPos.
|
|
rWhat will not be matched if it is a substring of a word.
|
|
*/
|
|
bool StringContainsWord( const OUString& rWhat,
|
|
const OUString& rString,
|
|
sal_Int32 nPos ) const;
|
|
|
|
// optimized substring versions
|
|
|
|
// Whether rString contains rWhat at nPos
|
|
static bool StringContains( const OUString& rWhat,
|
|
const OUString& rString,
|
|
sal_Int32 nPos )
|
|
{
|
|
if (rWhat.isEmpty() || rString.getLength() <= nPos)
|
|
{
|
|
return false;
|
|
}
|
|
// mostly used with one character
|
|
if ( rWhat[ 0 ] != rString[ nPos ] )
|
|
{
|
|
return false;
|
|
}
|
|
return StringContainsImpl( rWhat, rString, nPos );
|
|
}
|
|
|
|
// Whether pString contains rWhat at nPos
|
|
static bool StringPtrContains( const OUString& rWhat,
|
|
const sal_Unicode* pString,
|
|
sal_Int32 nPos ) // nPos MUST be a valid offset from pString
|
|
{
|
|
// mostly used with one character
|
|
if ( rWhat[ 0 ] != pString[ nPos ] )
|
|
{
|
|
return false;
|
|
}
|
|
return StringPtrContainsImpl( rWhat, pString, nPos );
|
|
}
|
|
|
|
//! DO NOT use directly
|
|
static bool StringContainsImpl( const OUString& rWhat,
|
|
const OUString& rString,
|
|
sal_Int32 nPos );
|
|
//! DO NOT use directly
|
|
static bool StringPtrContainsImpl( const OUString& rWhat,
|
|
const sal_Unicode* pString,
|
|
sal_Int32 nPos );
|
|
|
|
// Skip a special character
|
|
static inline bool SkipChar( sal_Unicode c,
|
|
std::u16string_view rString,
|
|
sal_Int32& nPos );
|
|
|
|
// Skip blank
|
|
static inline bool SkipBlanks( const OUString& rString,
|
|
sal_Int32& nPos );
|
|
|
|
// Jump over rWhat in rString at nPos
|
|
static inline bool SkipString( const OUString& rWhat,
|
|
const OUString& rString,
|
|
sal_Int32& nPos );
|
|
|
|
// Recognizes exactly ,111 as group separator
|
|
inline bool GetThousandSep( std::u16string_view rString,
|
|
sal_Int32& nPos,
|
|
sal_uInt16 nStringPos ) const;
|
|
// Get boolean value
|
|
short GetLogical( std::u16string_view rString ) const;
|
|
|
|
// Get month and advance string position
|
|
short GetMonth( const OUString& rString,
|
|
sal_Int32& nPos );
|
|
|
|
// Get day of week and advance string position
|
|
int GetDayOfWeek( const OUString& rString,
|
|
sal_Int32& nPos );
|
|
|
|
// Get currency symbol and advance string position
|
|
bool GetCurrency( const OUString& rString,
|
|
sal_Int32& nPos );
|
|
|
|
// Get symbol AM or PM and advance string position
|
|
bool GetTimeAmPm( const OUString& rString,
|
|
sal_Int32& nPos );
|
|
|
|
// Get decimal separator and advance string position
|
|
inline bool GetDecSep( std::u16string_view rString,
|
|
sal_Int32& nPos ) const;
|
|
|
|
// Get hundredth seconds separator and advance string position
|
|
inline bool GetTime100SecSep( std::u16string_view rString,
|
|
sal_Int32& nPos ) const;
|
|
|
|
// Get sign and advance string position
|
|
// Including special case '('
|
|
int GetSign( std::u16string_view rString,
|
|
sal_Int32& nPos );
|
|
|
|
// Get sign of exponent and advance string position
|
|
static short GetESign( std::u16string_view rString,
|
|
sal_Int32& nPos );
|
|
|
|
// Get next number as array offset
|
|
inline bool GetNextNumber( sal_uInt16& i,
|
|
sal_uInt16& j ) const;
|
|
|
|
/** Converts time -> double (only decimals)
|
|
|
|
@return TRUE if time, FALSE if not (e.g. hours >12 with AM/PM)
|
|
*/
|
|
bool GetTimeRef( double& fOutNumber, // result as double
|
|
sal_uInt16 nIndex, // Index of hour in input
|
|
sal_uInt16 nCnt, // Count of time substrings in input
|
|
SvNumInputOptions eInputOptions ) const;
|
|
sal_uInt16 ImplGetDay ( sal_uInt16 nIndex ) const; // Day input, 0 if no match
|
|
sal_uInt16 ImplGetMonth( sal_uInt16 nIndex ) const; // Month input, zero based return, NumberOfMonths if no match
|
|
sal_uInt16 ImplGetYear ( sal_uInt16 nIndex ); // Year input, 0 if no match
|
|
|
|
// Conversion of date to number
|
|
bool GetDateRef( double& fDays, // OUT: days diff to null date
|
|
sal_uInt16& nCounter ); // Count of date substrings
|
|
|
|
// Analyze start of string
|
|
bool ScanStartString( const OUString& rString );
|
|
|
|
// Analyze middle substring
|
|
bool ScanMidString( const OUString& rString,
|
|
sal_uInt16 nStringPos,
|
|
sal_uInt16 nCurNumCount );
|
|
|
|
|
|
// Analyze end of string
|
|
bool ScanEndString( const OUString& rString );
|
|
|
|
// Compare rString to substring of array indexed by nString
|
|
// nString == 0xFFFF => last substring
|
|
bool ScanStringNumFor( const OUString& rString,
|
|
sal_Int32 nPos,
|
|
sal_uInt16 nString,
|
|
bool bDontDetectNegation = false );
|
|
|
|
// if nMatchedAllStrings set nMatchedUsedAsReturn and return true,
|
|
// else do nothing and return false
|
|
bool MatchedReturn();
|
|
|
|
//! Be sure that the string to be analyzed is already converted to upper
|
|
//! case and if it contained native number digits that they are already
|
|
//! converted to ASCII.
|
|
|
|
// Main analyzing function
|
|
bool IsNumberFormatMain( const OUString& rString,
|
|
const SvNumberformat* pFormat); // number format to match against
|
|
|
|
/** Whether input matches locale dependent date acceptance pattern.
|
|
|
|
@param nStartPatternAt
|
|
The pattern matching starts at input particle
|
|
sStrArray[nStartPatternAt].
|
|
|
|
NOTE: once called the result is remembered, subsequent calls with
|
|
different parameters do not check for a match and do not lead to a
|
|
different result.
|
|
*/
|
|
bool IsAcceptedDatePattern( sal_uInt16 nStartPatternAt );
|
|
|
|
/** Sets (not advances!) rPos to sStrArray[nParticle].getLength() if string
|
|
matches separator in pattern at nParticle.
|
|
|
|
Also detects a signed year case like M/D/-Y
|
|
|
|
@returns TRUE if separator matched.
|
|
*/
|
|
bool SkipDatePatternSeparator( sal_uInt16 nParticle, sal_Int32 & rPos, bool & rSignedYear );
|
|
|
|
/** Returns count of numbers in accepted date pattern.
|
|
*/
|
|
sal_uInt16 GetDatePatternNumbers();
|
|
|
|
/** Whether numeric string nNumber is of type cType in accepted date
|
|
pattern, 'Y', 'M' or 'D'.
|
|
*/
|
|
bool IsDatePatternNumberOfType( sal_uInt16 nNumber, sal_Unicode cType );
|
|
|
|
/** Obtain order of accepted date pattern coded as, for example,
|
|
('D'<<16)|('M'<<8)|'Y'
|
|
*/
|
|
sal_uInt32 GetDatePatternOrder();
|
|
|
|
/** Obtain date format order, from accepted date pattern if available or
|
|
otherwise the locale's default order.
|
|
|
|
@param bFromFormatIfNoPattern
|
|
If <TRUE/> and no pattern was matched, obtain date order from
|
|
format if available, instead from format's or current locale.
|
|
*/
|
|
DateOrder GetDateOrder( bool bFromFormatIfNoPattern = false );
|
|
|
|
/** Whether input may be an ISO 8601 date format, yyyy-mm-dd...
|
|
|
|
Checks if input has at least 3 numbers for yyyy-mm-dd and the separator
|
|
is '-', and 1<=mm<=12 and 1<=dd<=31.
|
|
|
|
@see nMayBeIso8601
|
|
*/
|
|
bool MayBeIso8601();
|
|
|
|
/** Whether input may be a dd-month-yy format, with month name, not
|
|
number.
|
|
|
|
@see nMayBeMonthDate
|
|
*/
|
|
bool MayBeMonthDate();
|
|
|
|
/** Whether input is acceptable as ISO 8601 date format in the current
|
|
NfEvalDateFormat setting.
|
|
*/
|
|
bool IsAcceptableIso8601();
|
|
|
|
/** If month name in the middle was parsed, get the corresponding
|
|
LongDateOrder in GetDateRef().
|
|
*/
|
|
LongDateOrder GetMiddleMonthLongDateOrder( bool bFormatTurn,
|
|
const LocaleDataWrapper* pLoc,
|
|
DateOrder eDateOrder );
|
|
};
|
|
|
|
#endif // INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|