office-gobmx/svl/source/numbers/zforfind.hxx
Caolán McNamara c6c6126aa3 Related: tdf#160056 refactor SvNumberFormatter
to split it into two constituent parts

SvNFFormatData which is the data store for number formats it generally
operates on.

SvNFLanguageData for data around the current language in use.

and then a SvNFEngine which implements the interaction between those
parts

SvNFEngine has two policies, the typical RW mode and a new RO mode where
the SvNFFormatData doesn't change, all formats needed in this mode must
already exist.

Change-Id: I56b070ccd2e556a0cb1fe609a2fae28e18277c8c
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/165146
Tested-by: Jenkins
Reviewed-by: Caolán McNamara <caolan.mcnamara@collabora.com>
2024-03-22 21:50:59 +01:00

444 lines
18 KiB
C++

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#ifndef INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
#define INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
#include <com/sun/star/uno/Sequence.hxx>
#include <rtl/ustring.hxx>
#include <svl/zforlist.hxx>
#include <tools/date.hxx>
#include <memory>
#include <optional>
class SvNumberformat;
class SvNFLanguageData;
enum class SvNumFormatType : sal_Int16;
#define SV_MAX_COUNT_INPUT_STRINGS 20 // max count of substrings in input scanner
class ImpSvNumberInputScan
{
public:
explicit ImpSvNumberInputScan(SvNFLanguageData& rCurrentLanguage);
~ImpSvNumberInputScan();
/*!*/ void ChangeIntl(); // MUST be called if language changes
/// set reference date for offset calculation
void ChangeNullDate( const sal_uInt16 nDay,
const sal_uInt16 nMonth,
const sal_Int16 nYear );
/// convert input string to number
bool IsNumberFormat( const OUString& rString, /// input string
SvNumFormatType& F_Type, /// format type (in + out)
double& fOutNumber, /// value determined (out)
const SvNumberformat* pFormat, /// number format to which compare against
const NativeNumberWrapper* pNatNum,
SvNumInputOptions eInputOptions);
/// after IsNumberFormat: get decimal position
short GetDecPos() const { return nDecPos; }
/// after IsNumberFormat: get count of numeric substrings in input string
sal_uInt16 GetNumericsCount() const { return nNumericsCnt; }
/// set threshold of two-digit year input
void SetYear2000( sal_uInt16 nVal ) { nYear2000 = nVal; }
/// get threshold of two-digit year input
sal_uInt16 GetYear2000() const { return nYear2000; }
/** Whether input can be forced to ISO 8601 format.
Depends on locale's date separator and a specific date format order.
*/
bool CanForceToIso8601( DateOrder eDateOrder );
void InvalidateDateAcceptancePatterns();
/** Whether 'T' separator was detected in an ISO 8601 date+time format.
*/
bool HasIso8601Tsep() const { return bIso8601Tsep; }
private:
SvNFLanguageData& mrCurrentLanguageData;
const SvNumberformat* mpFormat; //* The format to compare against, if any
std::unique_ptr<OUString[]> pUpperMonthText; //* Array of month names, uppercase
std::unique_ptr<OUString[]> pUpperAbbrevMonthText; //* Array of month names, abbreviated, uppercase
std::unique_ptr<OUString[]> pUpperGenitiveMonthText; //* Array of genitive month names, uppercase
std::unique_ptr<OUString[]> pUpperGenitiveAbbrevMonthText; //* Array of genitive month names, abbreviated, uppercase
std::unique_ptr<OUString[]> pUpperPartitiveMonthText; //* Array of partitive month names, uppercase
std::unique_ptr<OUString[]> pUpperPartitiveAbbrevMonthText;//* Array of partitive month names, abbreviated, uppercase
std::unique_ptr<OUString[]> pUpperDayText; //* Array of day of week names, uppercase
std::unique_ptr<OUString[]> pUpperAbbrevDayText; //* Array of day of week names, abbreviated, uppercase
OUString aUpperCurrSymbol; //* Currency symbol, uppercase
bool bTextInitialized; //* Whether days and months are initialized
bool bScanGenitiveMonths; //* Whether to scan an input for genitive months
bool bScanPartitiveMonths; //* Whether to scan an input for partitive months
std::optional<Date> moNullDate; //* 30Dec1899
// Variables for provisional results:
OUString sStrArray[SV_MAX_COUNT_INPUT_STRINGS];//* Array of scanned substrings
bool IsNum[SV_MAX_COUNT_INPUT_STRINGS]; //* Whether a substring is numeric
sal_uInt16 nNums[SV_MAX_COUNT_INPUT_STRINGS]; //* Sequence of offsets to numeric strings
sal_uInt16 nStringsCnt; //* Total count of scanned substrings
sal_uInt16 nNumericsCnt; //* Count of numeric substrings
bool bDecSepInDateSeps; //* True <=> DecSep in {.,-,/,DateSep}
sal_uInt8 nMatchedAllStrings; //* Scan...String() matched all substrings,
// bit mask of nMatched... constants
static const sal_uInt8 nMatchedEndString; // 0x01
static const sal_uInt8 nMatchedMidString; // 0x02
static const sal_uInt8 nMatchedStartString; // 0x04
static const sal_uInt8 nMatchedVirgin; // 0x08
static const sal_uInt8 nMatchedUsedAsReturn; // 0x10
int nSign; // Sign of number
int nMonth; // Month (1..x) if date
// negative => short format
short nMonthPos; // 1 = front, 2 = middle
// 3 = end
int nDayOfWeek; // Temporary (!) day of week (1..7,-1..-7) if date
sal_uInt16 nTimePos; // Index of first time separator (+1)
short nDecPos; // Index of substring containing "," (+1)
bool bNegCheck; // '( )' for negative
short nESign; // Sign of exponent
short nAmPm; // +1 AM, -1 PM, 0 if none
short nLogical; // -1 => False, 1 => True
bool mbEraCE; // Era if date, 0 => BCE, 1 => CE (currently only Gregorian)
sal_uInt16 nThousand; // Count of group (AKA thousand) separators
sal_uInt16 nPosThousandString; // Position of concatenated 000,000,000 string
SvNumFormatType eScannedType; // Scanned type
SvNumFormatType eSetType; // Preset Type
sal_uInt16 nStringScanNumFor; // Fixed strings recognized in
// pFormat->NumFor[nNumForStringScan]
short nStringScanSign; // Sign resulting of FixString
sal_uInt16 nYear2000; // Two-digit threshold
// Year as 20xx
// default 18
// number <= nYear2000 => 20xx
// number > nYear2000 => 19xx
/** State of ISO 8601 detection.
0:= don't know yet
1:= no
2:= yes, <=2 digits in year
3:= yes, 3 digits in year
4:= yes, >=4 digits in year
@see MayBeIso8601()
*/
sal_uInt8 nMayBeIso8601;
/** Whether the 'T' time separator was detected in an ISO 8601 string. */
bool bIso8601Tsep;
/** State of dd-month-yy or yy-month-dd detection, with month name.
0:= don't know yet
1:= no
2:= yes, dd-month-yy
3:= yes, yy-month-dd
@see MayBeMonthDate()
*/
sal_uInt8 nMayBeMonthDate;
/** Input matched this locale dependent date acceptance pattern.
-2 if not checked yet, -1 if no match, >=0 matched pattern.
@see IsAcceptedDatePattern()
*/
sal_Int32 nAcceptedDatePattern;
css::uno::Sequence< OUString > sDateAcceptancePatterns;
/** If input matched a date acceptance pattern that starts at input
particle sStrArray[nDatePatternStart].
@see IsAcceptedDatePattern()
*/
sal_uInt16 nDatePatternStart;
/** Count of numbers that matched the accepted pattern, if any, else 0.
@see GetDatePatternNumbers()
*/
sal_uInt16 nDatePatternNumbers;
// Copy assignment is forbidden and not implemented.
ImpSvNumberInputScan (const ImpSvNumberInputScan &) = delete;
ImpSvNumberInputScan & operator= (const ImpSvNumberInputScan &) = delete;
void Reset(); // Reset all variables before start of analysis
void InitText(); // Init of months and days of week
// Convert string to double.
// Only simple unsigned floating point values without any error detection,
// decimal separator has to be '.'
// If bForceFraction==true the string is taken to be the fractional part
// of 0.1234 without the leading 0. (thus being just "1234").
static double StringToDouble( std::u16string_view aStr,
bool bForceFraction = false );
// Next number/string symbol
static bool NextNumberStringSymbol( const sal_Unicode*& pStr,
OUString& rSymbol );
// Concatenate ,000,23 blocks
// in input to 000123
bool SkipThousands( const sal_Unicode*& pStr, OUString& rSymbol ) const;
// Divide numbers/strings into
// arrays and variables above.
// Leading blanks and blanks
// after numbers are thrown away
void NumberStringDivision( const OUString& rString );
/** Whether rString contains word (!) rWhat at nPos.
rWhat will not be matched if it is a substring of a word.
*/
bool StringContainsWord( const OUString& rWhat,
const OUString& rString,
sal_Int32 nPos ) const;
// optimized substring versions
// Whether rString contains rWhat at nPos
static bool StringContains( const OUString& rWhat,
const OUString& rString,
sal_Int32 nPos )
{
if (rWhat.isEmpty() || rString.getLength() <= nPos)
{
return false;
}
// mostly used with one character
if ( rWhat[ 0 ] != rString[ nPos ] )
{
return false;
}
return StringContainsImpl( rWhat, rString, nPos );
}
// Whether pString contains rWhat at nPos
static bool StringPtrContains( const OUString& rWhat,
const sal_Unicode* pString,
sal_Int32 nPos ) // nPos MUST be a valid offset from pString
{
// mostly used with one character
if ( rWhat[ 0 ] != pString[ nPos ] )
{
return false;
}
return StringPtrContainsImpl( rWhat, pString, nPos );
}
//! DO NOT use directly
static bool StringContainsImpl( const OUString& rWhat,
const OUString& rString,
sal_Int32 nPos );
//! DO NOT use directly
static bool StringPtrContainsImpl( const OUString& rWhat,
const sal_Unicode* pString,
sal_Int32 nPos );
// Skip a special character
static inline bool SkipChar( sal_Unicode c,
std::u16string_view rString,
sal_Int32& nPos );
// Skip blank
static inline bool SkipBlanks( const OUString& rString,
sal_Int32& nPos );
// Jump over rWhat in rString at nPos
static inline bool SkipString( const OUString& rWhat,
const OUString& rString,
sal_Int32& nPos );
// Recognizes exactly ,111 as group separator
inline bool GetThousandSep( std::u16string_view rString,
sal_Int32& nPos,
sal_uInt16 nStringPos ) const;
// Get boolean value
short GetLogical( std::u16string_view rString ) const;
// Get month and advance string position
short GetMonth( const OUString& rString,
sal_Int32& nPos );
// Get day of week and advance string position
int GetDayOfWeek( const OUString& rString,
sal_Int32& nPos );
// Get currency symbol and advance string position
bool GetCurrency( const OUString& rString,
sal_Int32& nPos );
// Get symbol AM or PM and advance string position
bool GetTimeAmPm( const OUString& rString,
sal_Int32& nPos );
// Get decimal separator and advance string position
inline bool GetDecSep( std::u16string_view rString,
sal_Int32& nPos ) const;
// Get hundredth seconds separator and advance string position
inline bool GetTime100SecSep( std::u16string_view rString,
sal_Int32& nPos ) const;
// Get sign and advance string position
// Including special case '('
int GetSign( std::u16string_view rString,
sal_Int32& nPos );
// Get sign of exponent and advance string position
static short GetESign( std::u16string_view rString,
sal_Int32& nPos );
// Get next number as array offset
inline bool GetNextNumber( sal_uInt16& i,
sal_uInt16& j ) const;
/** Converts time -> double (only decimals)
@return TRUE if time, FALSE if not (e.g. hours >12 with AM/PM)
*/
bool GetTimeRef( double& fOutNumber, // result as double
sal_uInt16 nIndex, // Index of hour in input
sal_uInt16 nCnt, // Count of time substrings in input
SvNumInputOptions eInputOptions ) const;
sal_uInt16 ImplGetDay ( sal_uInt16 nIndex ) const; // Day input, 0 if no match
sal_uInt16 ImplGetMonth( sal_uInt16 nIndex ) const; // Month input, zero based return, NumberOfMonths if no match
sal_uInt16 ImplGetYear ( sal_uInt16 nIndex ); // Year input, 0 if no match
// Conversion of date to number
bool GetDateRef( double& fDays, // OUT: days diff to null date
sal_uInt16& nCounter ); // Count of date substrings
// Analyze start of string
bool ScanStartString( const OUString& rString );
// Analyze middle substring
bool ScanMidString( const OUString& rString,
sal_uInt16 nStringPos,
sal_uInt16 nCurNumCount );
// Analyze end of string
bool ScanEndString( const OUString& rString );
// Compare rString to substring of array indexed by nString
// nString == 0xFFFF => last substring
bool ScanStringNumFor( const OUString& rString,
sal_Int32 nPos,
sal_uInt16 nString,
bool bDontDetectNegation = false );
// if nMatchedAllStrings set nMatchedUsedAsReturn and return true,
// else do nothing and return false
bool MatchedReturn();
//! Be sure that the string to be analyzed is already converted to upper
//! case and if it contained native number digits that they are already
//! converted to ASCII.
// Main analyzing function
bool IsNumberFormatMain( const OUString& rString,
const SvNumberformat* pFormat); // number format to match against
/** Whether input matches locale dependent date acceptance pattern.
@param nStartPatternAt
The pattern matching starts at input particle
sStrArray[nStartPatternAt].
NOTE: once called the result is remembered, subsequent calls with
different parameters do not check for a match and do not lead to a
different result.
*/
bool IsAcceptedDatePattern( sal_uInt16 nStartPatternAt );
/** Sets (not advances!) rPos to sStrArray[nParticle].getLength() if string
matches separator in pattern at nParticle.
Also detects a signed year case like M/D/-Y
@returns TRUE if separator matched.
*/
bool SkipDatePatternSeparator( sal_uInt16 nParticle, sal_Int32 & rPos, bool & rSignedYear );
/** Returns count of numbers in accepted date pattern.
*/
sal_uInt16 GetDatePatternNumbers();
/** Whether numeric string nNumber is of type cType in accepted date
pattern, 'Y', 'M' or 'D'.
*/
bool IsDatePatternNumberOfType( sal_uInt16 nNumber, sal_Unicode cType );
/** Obtain order of accepted date pattern coded as, for example,
('D'<<16)|('M'<<8)|'Y'
*/
sal_uInt32 GetDatePatternOrder();
/** Obtain date format order, from accepted date pattern if available or
otherwise the locale's default order.
@param bFromFormatIfNoPattern
If <TRUE/> and no pattern was matched, obtain date order from
format if available, instead from format's or current locale.
*/
DateOrder GetDateOrder( bool bFromFormatIfNoPattern = false );
/** Whether input may be an ISO 8601 date format, yyyy-mm-dd...
Checks if input has at least 3 numbers for yyyy-mm-dd and the separator
is '-', and 1<=mm<=12 and 1<=dd<=31.
@see nMayBeIso8601
*/
bool MayBeIso8601();
/** Whether input may be a dd-month-yy format, with month name, not
number.
@see nMayBeMonthDate
*/
bool MayBeMonthDate();
/** Whether input is acceptable as ISO 8601 date format in the current
NfEvalDateFormat setting.
*/
bool IsAcceptableIso8601();
/** If month name in the middle was parsed, get the corresponding
LongDateOrder in GetDateRef().
*/
LongDateOrder GetMiddleMonthLongDateOrder( bool bFormatTurn,
const LocaleDataWrapper* pLoc,
DateOrder eDateOrder );
};
#endif // INCLUDED_SVL_SOURCE_NUMBERS_ZFORFIND_HXX
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */