office-gobmx/i18npool/qa/cppunit/test_textsearch.cxx
László Németh 3a02490e1a tdf#138258 i18npool: allow ASCII double quote to match typographic quote
Similar to the straight (typewriter or ASCII) apostrophe, straight
double quotation mark (") matches its typographic variants now,
like other word processors do.

Note: regex search doesn't use this matching, similar to the apostrophe
search.

Follow-up to commit d40f2d02df
"tdf#117643 Writer: fix apostrophe search regression".

Change-Id: If6a3ee00750828583cd0cfc4aa7f7b656ea9bd1e
Reviewed-on: https://gerrit.libreoffice.org/c/core/+/169605
Reviewed-by: László Németh <nemeth@numbertext.org>
Tested-by: Jenkins
2024-06-27 10:22:11 +02:00

632 lines
26 KiB
C++
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
/*
* This file is part of the LibreOffice project.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
*
* This file incorporates work covered by the following license notice:
*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed
* with this work for additional information regarding copyright
* ownership. The ASF licenses this file to you under the Apache
* License, Version 2.0 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.apache.org/licenses/LICENSE-2.0 .
*/
#include <com/sun/star/util/SearchFlags.hpp>
#include <com/sun/star/util/SearchOptions.hpp>
#include <com/sun/star/util/SearchAlgorithms2.hpp>
#include <com/sun/star/util/XTextSearch2.hpp>
#include <unotest/bootstrapfixturebase.hxx>
#include <i18nutil/transliteration.hxx>
#include <unicode/regex.h>
using namespace ::com::sun::star;
class TestTextSearch : public test::BootstrapFixtureBase
{
public:
virtual void setUp() override;
virtual void tearDown() override;
void testICU();
void testSearches();
void testWildcardSearch();
void testApostropheSearch();
void testQuotationMarkSearch();
void testTdf138410();
CPPUNIT_TEST_SUITE(TestTextSearch);
CPPUNIT_TEST(testICU);
CPPUNIT_TEST(testSearches);
CPPUNIT_TEST(testWildcardSearch);
CPPUNIT_TEST(testApostropheSearch);
CPPUNIT_TEST(testQuotationMarkSearch);
CPPUNIT_TEST(testTdf138410);
CPPUNIT_TEST_SUITE_END();
private:
uno::Reference<util::XTextSearch> m_xSearch;
uno::Reference<util::XTextSearch2> m_xSearch2;
};
// Sanity check our ICU first ...
void TestTextSearch::testICU()
{
UErrorCode nErr = U_ZERO_ERROR;
sal_uInt32 nSearchFlags = UREGEX_UWORD | UREGEX_CASE_INSENSITIVE;
OUString aString( u"abcdefgh"_ustr );
OUString aPattern( u"e"_ustr );
icu::UnicodeString aSearchPat( reinterpret_cast<const UChar*>(aPattern.getStr()), aPattern.getLength() );
std::unique_ptr<icu::RegexMatcher> pRegexMatcher(new icu::RegexMatcher( aSearchPat, nSearchFlags, nErr ));
icu::UnicodeString aSource( reinterpret_cast<const UChar*>(aString.getStr()), aString.getLength() );
pRegexMatcher->reset( aSource );
CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) );
CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(4), pRegexMatcher->start( nErr ) );
CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(5), pRegexMatcher->end( nErr ) );
CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
OUString aString2( u"acababaabcababadcdaa"_ustr );
OUString aPattern2( u"a"_ustr );
icu::UnicodeString aSearchPat2( reinterpret_cast<const UChar*>(aPattern2.getStr()), aPattern2.getLength() );
pRegexMatcher.reset(new icu::RegexMatcher( aSearchPat2, nSearchFlags, nErr ));
icu::UnicodeString aSource2( reinterpret_cast<const UChar*>(aString2.getStr()), aString2.getLength() );
pRegexMatcher->reset( aSource2 );
CPPUNIT_ASSERT( pRegexMatcher->find( 0, nErr ) );
CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(0), pRegexMatcher->start( nErr ) );
CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
CPPUNIT_ASSERT_EQUAL( static_cast<int32_t>(1), pRegexMatcher->end( nErr ) );
CPPUNIT_ASSERT_EQUAL( U_ZERO_ERROR, nErr );
}
void TestTextSearch::testSearches()
{
OUString str( u"acababaabcababadcdaa"_ustr );
sal_Int32 startPos = 2, endPos = 20 ;
sal_Int32 const fStartRes = 10, fEndRes = 18 ;
sal_Int32 const bStartRes = 18, bEndRes = 10 ;
// set options
util::SearchOptions aOptions;
aOptions.algorithmType = util::SearchAlgorithms_REGEXP ;
aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
aOptions.searchString = "(ab)*a(c|d)+";
m_xSearch->setOptions( aOptions );
util::SearchResult aRes;
// search forward
aRes = m_xSearch->searchForward( str, startPos, endPos );
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( fStartRes, aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( fEndRes, aRes.endOffset[0] );
// search backwards
aRes = m_xSearch->searchBackward( str, endPos, startPos );
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( bStartRes, aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( bEndRes, aRes.endOffset[0] );
aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
| TransliterationFlags::IGNORE_WIDTH);
aOptions.searchString = "([^ ]*)[ ]*([^ ]*)";
m_xSearch->setOptions(aOptions);
aRes = m_xSearch->searchForward(u"11 22 33"_ustr, 2, 7);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.startOffset[1]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[1]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.startOffset[2]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[2]);
}
void TestTextSearch::testWildcardSearch()
{
util::SearchOptions2 aOptions;
OUString aText;
util::SearchResult aRes;
aOptions.AlgorithmType2 = util::SearchAlgorithms2::WILDCARD ;
aOptions.WildcardEscapeCharacter = '~';
// aOptions.searchFlag = ::css::util::SearchFlags::WILD_MATCH_SELECTION;
// is not set, so substring match is allowed.
aOptions.transliterateFlags = sal_Int32(::css::i18n::TransliterationModules::TransliterationModules_IGNORE_CASE);
aText = "abAca";
aOptions.searchString = "a";
m_xSearch2->setOptions2( aOptions );
// match first "a", [0,1)
aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]);
// match last "a", (5,4]
aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]);
aOptions.searchString = "a?";
m_xSearch2->setOptions2( aOptions );
// match "ab", [0,2)
aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[0]);
// match "ac", (4,2]
aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[0]);
aOptions.searchString = "a*c";
m_xSearch2->setOptions2( aOptions );
// match "abac", [0,4) XXX NOTE: first match forward
aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]);
// match "ac", (4,2] XXX NOTE: first match backward, not greedy
aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(2), aRes.endOffset[0]);
aOptions.searchString = "b*a";
m_xSearch2->setOptions2( aOptions );
// match "ba", [1,3) XXX NOTE: first match forward, not greedy
aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.endOffset[0]);
// match "baca", (5,1] XXX NOTE: first match backward
aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]);
aText = "ab?ca";
aOptions.searchString = "?~??";
m_xSearch2->setOptions2( aOptions );
// match "b?c", [1,4)
aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]);
// match "b?c", (4,1]
aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]);
aText = "ab*ca";
aOptions.searchString = "?~*?";
m_xSearch2->setOptions2( aOptions );
// match "b?c", [1,4)
aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.endOffset[0]);
// match "b?c", (4,1]
aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(4), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(1), aRes.endOffset[0]);
aOptions.searchString = "ca?";
m_xSearch2->setOptions2( aOptions );
// no match
aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
// no match
aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
aOptions.searchString = "ca*";
m_xSearch2->setOptions2( aOptions );
// match "ca", [3,5)
aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[0]);
// match "ca", (5,3]
aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.endOffset[0]);
aOptions.searchString = "*ca*";
m_xSearch2->setOptions2( aOptions );
// match "abaca", [0,5)
aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.endOffset[0]);
// match "abaca", (5,0]
aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(5), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.endOffset[0]);
aText = "123123";
aOptions.searchString = "*2?";
m_xSearch2->setOptions2( aOptions );
// match first "123", [0,3)
aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(3), aRes.endOffset[0]);
// match "123123", (6,0] Yes this looks odd, but it is as searching "?2*" forward.
aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.endOffset[0]);
aOptions.searchFlag |= util::SearchFlags::WILD_MATCH_SELECTION;
m_xSearch2->setOptions2( aOptions );
// match "123123", [0,6) with greedy '*'
aRes = m_xSearch2->searchForward( aText, 0, aText.getLength());
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes.endOffset[0]);
// match "123123", (6,0]
aRes = m_xSearch2->searchBackward( aText, aText.getLength(), 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.subRegExpressions);
CPPUNIT_ASSERT_EQUAL(sal_Int32(6), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(sal_Int32(0), aRes.endOffset[0]);
}
void TestTextSearch::testApostropheSearch()
{
// A) find typographic apostrophes also by using ASCII apostrophe in searchString
OUString str( u"It\u2019s an apostrophe."_ustr );
sal_Int32 startPos = 0, endPos = str.getLength();
// set options
util::SearchOptions aOptions;
aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE;
aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
aOptions.searchString = "'";
m_xSearch->setOptions( aOptions );
util::SearchResult aRes;
// search forward
aRes = m_xSearch->searchForward( str, startPos, endPos );
// This was 0.
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
// search backwards
aRes = m_xSearch->searchBackward( str, endPos, startPos );
// This was 0.
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
// check with transliteration
aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
| TransliterationFlags::IGNORE_WIDTH);
m_xSearch->setOptions(aOptions);
// search forward
aRes = m_xSearch->searchForward( str, startPos, endPos );
// This was 0.
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
// search backwards
aRes = m_xSearch->searchBackward( str, endPos, startPos );
// This was 0.
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
// B) search ASCII apostrophe in a text with ASCII apostrophes
str = str.replace(u'\u2019', '\'');
// search forward
aRes = m_xSearch->searchForward( str, startPos, endPos );
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
// search backwards
aRes = m_xSearch->searchBackward( str, endPos, startPos );
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
// C) search typographic apostrophe in a text with ASCII apostrophes (no result)
aOptions.searchString = u"\u2019"_ustr;
m_xSearch->setOptions( aOptions );
aRes = m_xSearch->searchForward( str, startPos, endPos );
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
aRes = m_xSearch->searchBackward( str, endPos, startPos );
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
// D) search typographic apostrophe in a text with typographic apostrophes
str = str.replace('\'', u'\u2019');
// search forward
aRes = m_xSearch->searchForward( str, startPos, endPos );
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
// search backwards
aRes = m_xSearch->searchBackward( str, endPos, startPos );
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(2), aRes.endOffset[0] );
// E) search mixed apostrophes in a text with mixed apostrophes:
aOptions.searchString = u"'\u2019"_ustr;
m_xSearch->setOptions( aOptions );
str = u"test: \u2019'"_ustr;
// search forward
aRes = m_xSearch->searchForward( str, startPos, str.getLength());
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
// search backwards
aRes = m_xSearch->searchBackward( str, str.getLength(), startPos );
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
// F) search mixed apostrophes in a text with ASCII apostrophes:
str = u"test: ''"_ustr;
// search forward
aRes = m_xSearch->searchForward( str, startPos, str.getLength());
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
// search backwards
aRes = m_xSearch->searchBackward( str, str.getLength(), startPos );
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
}
void TestTextSearch::testQuotationMarkSearch()
{
// A) find typographic quotation marks also by using ASCII ones
OUString str( u"“x”, „y‟, z, a"_ustr );
sal_Int32 startPos = 0, endPos = str.getLength();
// set options
util::SearchOptions aOptions;
aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE;
aOptions.searchFlag = util::SearchFlags::ALL_IGNORE_CASE;
aOptions.searchString = "\"x\"";
aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_CASE
| TransliterationFlags::IGNORE_WIDTH);
m_xSearch->setOptions( aOptions );
util::SearchResult aRes;
// search forward
aRes = m_xSearch->searchForward( str, startPos, endPos );
// This was 0.
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(0), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.endOffset[0] );
// search backwards
aRes = m_xSearch->searchBackward( str, endPos, startPos );
// This was 0.
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(3), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(0), aRes.endOffset[0] );
// B)
aOptions.searchString = "\"y\"";
m_xSearch->setOptions( aOptions );
// search forward
aRes = m_xSearch->searchForward( str, startPos, endPos );
// This was 0.
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(5), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(8), aRes.endOffset[0] );
// search backwards
aRes = m_xSearch->searchBackward( str, endPos, startPos );
// This was 0.
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(8), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(5), aRes.endOffset[0] );
// C)
aOptions.searchString = "'z'";
m_xSearch->setOptions( aOptions );
// search forward
aRes = m_xSearch->searchForward( str, startPos, endPos );
// This was 0.
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(10), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(13), aRes.endOffset[0] );
// search backwards
aRes = m_xSearch->searchBackward( str, endPos, startPos );
// This was 0.
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(13), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(10), aRes.endOffset[0] );
// D)
aOptions.searchString = "'a'";
m_xSearch->setOptions( aOptions );
// search forward
aRes = m_xSearch->searchForward( str, startPos, endPos );
// This was 0.
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(15), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(18), aRes.endOffset[0] );
// search backwards
aRes = m_xSearch->searchBackward( str, endPos, startPos );
// This was 0.
CPPUNIT_ASSERT( aRes.subRegExpressions > 0 );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(18), aRes.startOffset[0] );
CPPUNIT_ASSERT_EQUAL( static_cast<sal_Int32>(15), aRes.endOffset[0] );
}
void TestTextSearch::testTdf138410()
{
OUString str(u"\u0643\u064f\u062a\u064f\u0628 \u0643\u062a\u0628"_ustr);
sal_Int32 startPos = 0, endPos = str.getLength();
util::SearchOptions aOptions;
aOptions.algorithmType = util::SearchAlgorithms_ABSOLUTE;
util::SearchResult aRes;
// A) base alone
// The search string will be found whether it is followed by a mark in the
// text or not, and whether IGNORE_DIACRITICS_CTL is set or not.
// set options
aOptions.searchString = u"\u0643"_ustr;
aOptions.transliterateFlags = 0;
m_xSearch->setOptions(aOptions);
// search forward
aRes = m_xSearch->searchForward(str, startPos, endPos);
CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]);
// search backwards
aRes = m_xSearch->searchBackward(str, endPos, startPos);
CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]);
// check with transliteration
aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL);
m_xSearch->setOptions(aOptions);
// search forward
aRes = m_xSearch->searchForward(str, startPos, endPos);
CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]);
// search backwards
aRes = m_xSearch->searchBackward(str, endPos, startPos);
CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]);
// b) base+mark
// The search string will be found when followed by a mark in the text, or
// when IGNORE_DIACRITICS_CTL is set whether it is followed by a mark or
// not.
// set options
aOptions.searchString = u"\u0643\u064f"_ustr;
aOptions.transliterateFlags = 0;
m_xSearch->setOptions(aOptions);
// search forward
aRes = m_xSearch->searchForward(str, startPos, endPos);
CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.endOffset[0]);
// search backwards
aRes = m_xSearch->searchBackward(str, endPos, startPos);
CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.endOffset[0]);
// check with transliteration
aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL);
m_xSearch->setOptions(aOptions);
// search forward
aRes = m_xSearch->searchForward(str, startPos, endPos);
CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.endOffset[0]);
// search backwards
aRes = m_xSearch->searchBackward(str, endPos, startPos);
CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(7), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(6), aRes.endOffset[0]);
// b) mark alone
// The search string will be found only when IGNORE_DIACRITICS_CTL is not
// set.
// set options
aOptions.searchString = u"\u064f"_ustr;
aOptions.transliterateFlags = 0;
m_xSearch->setOptions(aOptions);
// search forward
aRes = m_xSearch->searchForward(str, startPos, endPos);
CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(1), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(2), aRes.endOffset[0]);
// search backwards
aRes = m_xSearch->searchBackward(str, endPos, startPos);
CPPUNIT_ASSERT(aRes.subRegExpressions > 0);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(4), aRes.startOffset[0]);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(3), aRes.endOffset[0]);
// with ignore marks the mark will not be found
aOptions.transliterateFlags = static_cast<int>(TransliterationFlags::IGNORE_DIACRITICS_CTL);
m_xSearch->setOptions(aOptions);
// search forward
aRes = m_xSearch->searchForward(str, startPos, endPos);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
// search backwards
aRes = m_xSearch->searchBackward(str, endPos, startPos);
CPPUNIT_ASSERT_EQUAL(static_cast<sal_Int32>(0), aRes.subRegExpressions);
}
void TestTextSearch::setUp()
{
BootstrapFixtureBase::setUp();
m_xSearch.set(m_xSFactory->createInstance(u"com.sun.star.util.TextSearch"_ustr), uno::UNO_QUERY_THROW);
m_xSearch2.set(m_xSFactory->createInstance(u"com.sun.star.util.TextSearch2"_ustr), uno::UNO_QUERY_THROW);
}
void TestTextSearch::tearDown()
{
m_xSearch.clear();
m_xSearch2.clear();
BootstrapFixtureBase::tearDown();
}
CPPUNIT_TEST_SUITE_REGISTRATION(TestTextSearch);
CPPUNIT_PLUGIN_IMPLEMENT();
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */