tdf#160478: fix Basic LIKE operator

1. The regex must match the whole input. Thus, the ^ and $ metacharacters
   must be replaced with \A and \z, which only match beginning and end of
   input, not any line start / end.
2. The * and ? metacharacters of LIKE must match newline character; thus,
   search flags must include UREGEX_DOTALL. To avoid changing TextSearch
   implementation, I use icu::RegexMatcher directly.

The direct use of icu::RegexMatcher also allowed to simplify the code by
calling icu::RegexMatcher::matches method.  This may perform better than
general-purpose utl::TextSearch::SearchForward.

Change-Id: I75776498b36f236da294462362ed5b36ed8cdf68
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/165700
Tested-by: Jenkins
Reviewed-by: Mike Kaganski <mike.kaganski@collabora.com>
This commit is contained in:
Mike Kaganski 2024-04-02 20:34:00 +05:00
parent 0fb98963c4
commit 38f731ff67
3 changed files with 41 additions and 16 deletions

View file

@ -11,6 +11,12 @@ $(eval $(call gb_CppunitTest_CppunitTest,basic_scanner))
$(eval $(call gb_CppunitTest_use_ure,basic_scanner))
$(eval $(call gb_CppunitTest_use_externals,basic_scanner,\
icu_headers \
icuuc \
icui18n \
))
$(eval $(call gb_CppunitTest_add_exception_objects,basic_scanner, \
basic/qa/cppunit/test_scanner \
))

View file

@ -25,6 +25,12 @@ $(eval $(call gb_Library_set_include,sb,\
-I$(SRCDIR)/basic/source/inc \
))
$(eval $(call gb_Library_use_externals,sb,\
icu_headers \
icuuc \
icui18n \
))
$(eval $(call gb_Library_set_precompiled_header,sb,basic/inc/pch/precompiled_sb))
$(eval $(call gb_Library_use_custom_headers,sb,\

View file

@ -49,9 +49,7 @@
#include <svl/numformat.hxx>
#include <svl/zforlist.hxx>
#include <i18nutil/searchopt.hxx>
#include <unotools/syslocale.hxx>
#include <unotools/textsearch.hxx>
#include <unicode/regex.h>
#include <basic/sbuno.hxx>
@ -1470,7 +1468,7 @@ namespace
int seenright = 0;
sResult.append('^');
sResult.append("\\A"); // Match at the beginning of the input
while (start < end)
{
@ -1534,7 +1532,7 @@ namespace
}
}
sResult.append('$');
sResult.append("\\z"); // Match if the current position is at the end of input
return sResult.makeStringAndClear();
}
@ -1546,13 +1544,7 @@ void SbiRuntime::StepLIKE()
SbxVariableRef refVar2 = PopVar();
OUString value = refVar2->GetOUString();
i18nutil::SearchOptions2 aSearchOpt;
aSearchOpt.AlgorithmType2 = css::util::SearchAlgorithms2::REGEXP;
aSearchOpt.Locale = Application::GetSettings().GetLanguageTag().getLocale();
aSearchOpt.searchString = VBALikeToRegexp(refVar1->GetOUString());
OUString regex = VBALikeToRegexp(refVar1->GetOUString());
bool bTextMode(true);
bool bCompatibility = ( GetSbData()->pInst && GetSbData()->pInst->IsCompatibility() );
@ -1560,14 +1552,35 @@ void SbiRuntime::StepLIKE()
{
bTextMode = IsImageFlag( SbiImageFlags::COMPARETEXT );
}
sal_uInt32 searchFlags = UREGEX_UWORD | UREGEX_DOTALL; // Dot matches newline
if( bTextMode )
{
aSearchOpt.transliterateFlags |= TransliterationFlags::IGNORE_CASE;
searchFlags |= UREGEX_CASE_INSENSITIVE;
}
static sal_uInt32 cachedSearchFlags = 0;
static OUString cachedRegex;
static std::optional<icu::RegexMatcher> oRegexMatcher;
UErrorCode nIcuErr = U_ZERO_ERROR;
if (regex != cachedRegex || searchFlags != cachedSearchFlags || !oRegexMatcher)
{
cachedRegex = regex;
cachedSearchFlags = searchFlags;
icu::UnicodeString sRegex(false, reinterpret_cast<const UChar*>(cachedRegex.getStr()),
cachedRegex.getLength());
oRegexMatcher.emplace(sRegex, cachedSearchFlags, nIcuErr);
}
icu::UnicodeString sSource(false, reinterpret_cast<const UChar*>(value.getStr()),
value.getLength());
oRegexMatcher->reset(sSource);
bool bRes = oRegexMatcher->matches(nIcuErr);
if (nIcuErr)
{
Error(ERRCODE_BASIC_INTERNAL_ERROR);
}
SbxVariable* pRes = new SbxVariable;
utl::TextSearch aSearch( aSearchOpt);
sal_Int32 nStart=0, nEnd=value.getLength();
bool bRes = aSearch.SearchForward(value, &nStart, &nEnd);
pRes->PutBool( bRes );
PushVar( pRes );