tdf#160478: fix Basic LIKE operator
1. The regex must match the whole input. Thus, the ^ and $ metacharacters must be replaced with \A and \z, which only match beginning and end of input, not any line start / end. 2. The * and ? metacharacters of LIKE must match newline character; thus, search flags must include UREGEX_DOTALL. To avoid changing TextSearch implementation, I use icu::RegexMatcher directly. The direct use of icu::RegexMatcher also allowed to simplify the code by calling icu::RegexMatcher::matches method. This may perform better than general-purpose utl::TextSearch::SearchForward. Change-Id: I75776498b36f236da294462362ed5b36ed8cdf68 Reviewed-on: https://gerrit.libreoffice.org/c/core/+/165700 Tested-by: Jenkins Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
This commit is contained in:
parent
0fb98963c4
commit
38f731ff67
3 changed files with 41 additions and 16 deletions
|
@ -11,6 +11,12 @@ $(eval $(call gb_CppunitTest_CppunitTest,basic_scanner))
|
|||
|
||||
$(eval $(call gb_CppunitTest_use_ure,basic_scanner))
|
||||
|
||||
$(eval $(call gb_CppunitTest_use_externals,basic_scanner,\
|
||||
icu_headers \
|
||||
icuuc \
|
||||
icui18n \
|
||||
))
|
||||
|
||||
$(eval $(call gb_CppunitTest_add_exception_objects,basic_scanner, \
|
||||
basic/qa/cppunit/test_scanner \
|
||||
))
|
||||
|
|
|
@ -25,6 +25,12 @@ $(eval $(call gb_Library_set_include,sb,\
|
|||
-I$(SRCDIR)/basic/source/inc \
|
||||
))
|
||||
|
||||
$(eval $(call gb_Library_use_externals,sb,\
|
||||
icu_headers \
|
||||
icuuc \
|
||||
icui18n \
|
||||
))
|
||||
|
||||
$(eval $(call gb_Library_set_precompiled_header,sb,basic/inc/pch/precompiled_sb))
|
||||
|
||||
$(eval $(call gb_Library_use_custom_headers,sb,\
|
||||
|
|
|
@ -49,9 +49,7 @@
|
|||
#include <svl/numformat.hxx>
|
||||
#include <svl/zforlist.hxx>
|
||||
|
||||
#include <i18nutil/searchopt.hxx>
|
||||
#include <unotools/syslocale.hxx>
|
||||
#include <unotools/textsearch.hxx>
|
||||
#include <unicode/regex.h>
|
||||
|
||||
#include <basic/sbuno.hxx>
|
||||
|
||||
|
@ -1470,7 +1468,7 @@ namespace
|
|||
|
||||
int seenright = 0;
|
||||
|
||||
sResult.append('^');
|
||||
sResult.append("\\A"); // Match at the beginning of the input
|
||||
|
||||
while (start < end)
|
||||
{
|
||||
|
@ -1534,7 +1532,7 @@ namespace
|
|||
}
|
||||
}
|
||||
|
||||
sResult.append('$');
|
||||
sResult.append("\\z"); // Match if the current position is at the end of input
|
||||
|
||||
return sResult.makeStringAndClear();
|
||||
}
|
||||
|
@ -1546,13 +1544,7 @@ void SbiRuntime::StepLIKE()
|
|||
SbxVariableRef refVar2 = PopVar();
|
||||
|
||||
OUString value = refVar2->GetOUString();
|
||||
|
||||
i18nutil::SearchOptions2 aSearchOpt;
|
||||
|
||||
aSearchOpt.AlgorithmType2 = css::util::SearchAlgorithms2::REGEXP;
|
||||
|
||||
aSearchOpt.Locale = Application::GetSettings().GetLanguageTag().getLocale();
|
||||
aSearchOpt.searchString = VBALikeToRegexp(refVar1->GetOUString());
|
||||
OUString regex = VBALikeToRegexp(refVar1->GetOUString());
|
||||
|
||||
bool bTextMode(true);
|
||||
bool bCompatibility = ( GetSbData()->pInst && GetSbData()->pInst->IsCompatibility() );
|
||||
|
@ -1560,14 +1552,35 @@ void SbiRuntime::StepLIKE()
|
|||
{
|
||||
bTextMode = IsImageFlag( SbiImageFlags::COMPARETEXT );
|
||||
}
|
||||
sal_uInt32 searchFlags = UREGEX_UWORD | UREGEX_DOTALL; // Dot matches newline
|
||||
if( bTextMode )
|
||||
{
|
||||
aSearchOpt.transliterateFlags |= TransliterationFlags::IGNORE_CASE;
|
||||
searchFlags |= UREGEX_CASE_INSENSITIVE;
|
||||
}
|
||||
|
||||
static sal_uInt32 cachedSearchFlags = 0;
|
||||
static OUString cachedRegex;
|
||||
static std::optional<icu::RegexMatcher> oRegexMatcher;
|
||||
UErrorCode nIcuErr = U_ZERO_ERROR;
|
||||
if (regex != cachedRegex || searchFlags != cachedSearchFlags || !oRegexMatcher)
|
||||
{
|
||||
cachedRegex = regex;
|
||||
cachedSearchFlags = searchFlags;
|
||||
icu::UnicodeString sRegex(false, reinterpret_cast<const UChar*>(cachedRegex.getStr()),
|
||||
cachedRegex.getLength());
|
||||
oRegexMatcher.emplace(sRegex, cachedSearchFlags, nIcuErr);
|
||||
}
|
||||
|
||||
icu::UnicodeString sSource(false, reinterpret_cast<const UChar*>(value.getStr()),
|
||||
value.getLength());
|
||||
oRegexMatcher->reset(sSource);
|
||||
|
||||
bool bRes = oRegexMatcher->matches(nIcuErr);
|
||||
if (nIcuErr)
|
||||
{
|
||||
Error(ERRCODE_BASIC_INTERNAL_ERROR);
|
||||
}
|
||||
SbxVariable* pRes = new SbxVariable;
|
||||
utl::TextSearch aSearch( aSearchOpt);
|
||||
sal_Int32 nStart=0, nEnd=value.getLength();
|
||||
bool bRes = aSearch.SearchForward(value, &nStart, &nEnd);
|
||||
pRes->PutBool( bRes );
|
||||
|
||||
PushVar( pRes );
|
||||
|
|
Loading…
Reference in a new issue