1016 lines
33 KiB
C++
1016 lines
33 KiB
C++
/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
|
|
/*************************************************************************
|
|
*
|
|
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
|
|
*
|
|
* Copyright 2000, 2010 Oracle and/or its affiliates.
|
|
*
|
|
* OpenOffice.org - a multi-platform office productivity suite
|
|
*
|
|
* This file is part of OpenOffice.org.
|
|
*
|
|
* OpenOffice.org is free software: you can redistribute it and/or modify
|
|
* it under the terms of the GNU Lesser General Public License version 3
|
|
* only, as published by the Free Software Foundation.
|
|
*
|
|
* OpenOffice.org is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU Lesser General Public License version 3 for more details
|
|
* (a copy is included in the LICENSE file that accompanied this code).
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public License
|
|
* version 3 along with OpenOffice.org. If not, see
|
|
* <http://www.openoffice.org/license.html>
|
|
* for a copy of the LGPLv3 License.
|
|
*
|
|
************************************************************************/
|
|
|
|
// MARKER(update_precomp.py): autogen include statement, do not remove
|
|
#include "precompiled_i18npool.hxx"
|
|
|
|
#include "textsearch.hxx"
|
|
#include "levdis.hxx"
|
|
#include <regexp/reclass.hxx>
|
|
#include <com/sun/star/lang/Locale.hpp>
|
|
#include <com/sun/star/lang/XMultiServiceFactory.hpp>
|
|
#include <comphelper/processfactory.hxx>
|
|
#include <com/sun/star/i18n/UnicodeType.hpp>
|
|
#include <com/sun/star/util/SearchFlags.hpp>
|
|
#include <com/sun/star/i18n/WordType.hpp>
|
|
#include <com/sun/star/i18n/ScriptType.hpp>
|
|
#include <com/sun/star/i18n/CharacterIteratorMode.hpp>
|
|
#include <com/sun/star/i18n/KCharacterType.hpp>
|
|
#include <com/sun/star/registry/XRegistryKey.hpp>
|
|
#include <cppuhelper/factory.hxx>
|
|
#include <cppuhelper/weak.hxx>
|
|
|
|
#ifdef _MSC_VER
|
|
// get rid of that dumb compiler warning
|
|
// identifier was truncated to '255' characters in the debug information
|
|
// for STL template usage, if .pdb files are to be created
|
|
#pragma warning( disable: 4786 )
|
|
#endif
|
|
|
|
#include <string.h>
|
|
|
|
using namespace ::com::sun::star::util;
|
|
using namespace ::com::sun::star::uno;
|
|
using namespace ::com::sun::star::lang;
|
|
using namespace ::com::sun::star::i18n;
|
|
using namespace ::rtl;
|
|
|
|
static sal_Int32 COMPLEX_TRANS_MASK_TMP =
|
|
TransliterationModules_ignoreBaFa_ja_JP |
|
|
TransliterationModules_ignoreIterationMark_ja_JP |
|
|
TransliterationModules_ignoreTiJi_ja_JP |
|
|
TransliterationModules_ignoreHyuByu_ja_JP |
|
|
TransliterationModules_ignoreSeZe_ja_JP |
|
|
TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
|
|
TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
|
|
TransliterationModules_ignoreProlongedSoundMark_ja_JP;
|
|
static const sal_Int32 SIMPLE_TRANS_MASK = 0xffffffff ^ COMPLEX_TRANS_MASK_TMP;
|
|
static const sal_Int32 COMPLEX_TRANS_MASK =
|
|
COMPLEX_TRANS_MASK_TMP |
|
|
TransliterationModules_IGNORE_KANA |
|
|
TransliterationModules_IGNORE_WIDTH;
|
|
// Above 2 transliteration is simple but need to take effect in
|
|
// complex transliteration
|
|
|
|
TextSearch::TextSearch(const Reference < XMultiServiceFactory > & rxMSF)
|
|
: xMSF( rxMSF )
|
|
, pJumpTable( 0 )
|
|
, pJumpTable2( 0 )
|
|
, pRegExp( 0 )
|
|
, pWLD( 0 )
|
|
{
|
|
SearchOptions aOpt;
|
|
aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
|
|
aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
|
|
//aOpt.Locale = ???;
|
|
setOptions( aOpt );
|
|
}
|
|
|
|
TextSearch::~TextSearch()
|
|
{
|
|
delete pRegExp;
|
|
delete pWLD;
|
|
delete pJumpTable;
|
|
delete pJumpTable2;
|
|
}
|
|
|
|
void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException )
|
|
{
|
|
aSrchPara = rOptions;
|
|
|
|
delete pRegExp, pRegExp = 0;
|
|
delete pWLD, pWLD = 0;
|
|
delete pJumpTable, pJumpTable = 0;
|
|
delete pJumpTable2, pJumpTable2 = 0;
|
|
|
|
// Create Transliteration class
|
|
if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
|
|
{
|
|
if( !xTranslit.is() )
|
|
{
|
|
Reference < XInterface > xI = xMSF->createInstance(
|
|
OUString(RTL_CONSTASCII_USTRINGPARAM(
|
|
"com.sun.star.i18n.Transliteration")));
|
|
if ( xI.is() )
|
|
xI->queryInterface( ::getCppuType(
|
|
(const Reference< XExtendedTransliteration >*)0))
|
|
>>= xTranslit;
|
|
}
|
|
// Load transliteration module
|
|
if( xTranslit.is() )
|
|
xTranslit->loadModule(
|
|
(TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
|
|
aSrchPara.Locale);
|
|
}
|
|
else if( xTranslit.is() )
|
|
xTranslit = 0;
|
|
|
|
// Create Transliteration for 2<->1, 2<->2 transliteration
|
|
if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
|
|
{
|
|
if( !xTranslit2.is() )
|
|
{
|
|
Reference < XInterface > xI = xMSF->createInstance(
|
|
OUString(RTL_CONSTASCII_USTRINGPARAM(
|
|
"com.sun.star.i18n.Transliteration")));
|
|
if ( xI.is() )
|
|
xI->queryInterface( ::getCppuType(
|
|
(const Reference< XExtendedTransliteration >*)0))
|
|
>>= xTranslit2;
|
|
}
|
|
// Load transliteration module
|
|
if( xTranslit2.is() )
|
|
xTranslit2->loadModule(
|
|
(TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
|
|
aSrchPara.Locale);
|
|
}
|
|
|
|
if ( !xBreak.is() )
|
|
{
|
|
Reference < XInterface > xI = xMSF->createInstance(
|
|
OUString(RTL_CONSTASCII_USTRINGPARAM("com.sun.star.i18n.BreakIterator")));
|
|
if( xI.is() )
|
|
xI->queryInterface( ::getCppuType(
|
|
(const Reference< XBreakIterator >*)0))
|
|
>>= xBreak;
|
|
}
|
|
|
|
sSrchStr = aSrchPara.searchString;
|
|
|
|
// use transliteration here, but only if not RegEx, which does it different
|
|
if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit.is() &&
|
|
aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
|
|
sSrchStr = xTranslit->transliterateString2String(
|
|
aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
|
|
|
|
if ( aSrchPara.algorithmType != SearchAlgorithms_REGEXP && xTranslit2.is() &&
|
|
aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
|
|
sSrchStr2 = xTranslit2->transliterateString2String(
|
|
aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
|
|
|
|
// When start or end of search string is a complex script type, we need to
|
|
// make sure the result boundary is not located in the middle of cell.
|
|
checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
|
|
ScriptType::COMPLEX));
|
|
checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
|
|
sSrchStr.getLength()-1) == ScriptType::COMPLEX));
|
|
|
|
if ( aSrchPara.algorithmType == SearchAlgorithms_REGEXP )
|
|
{
|
|
fnForward = &TextSearch::RESrchFrwrd;
|
|
fnBackward = &TextSearch::RESrchBkwrd;
|
|
|
|
pRegExp = new Regexpr( aSrchPara, xTranslit );
|
|
}
|
|
else
|
|
{
|
|
if ( aSrchPara.algorithmType == SearchAlgorithms_APPROXIMATE )
|
|
{
|
|
fnForward = &TextSearch::ApproxSrchFrwrd;
|
|
fnBackward = &TextSearch::ApproxSrchBkwrd;
|
|
|
|
pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
|
|
aSrchPara.insertedChars, aSrchPara.deletedChars,
|
|
0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
|
|
|
|
nLimit = pWLD->GetLimit();
|
|
}
|
|
else
|
|
{
|
|
fnForward = &TextSearch::NSrchFrwrd;
|
|
fnBackward = &TextSearch::NSrchBkwrd;
|
|
}
|
|
}
|
|
}
|
|
|
|
sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
|
|
{
|
|
sal_Int32 nRet = 0, nEnd = rOff.getLength();
|
|
while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
|
|
return nRet;
|
|
}
|
|
|
|
sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
|
|
throw( RuntimeException )
|
|
{
|
|
sal_Int32 nDone;
|
|
return nPos == xBreak->previousCharacters(searchStr, nPos+1,
|
|
aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
|
|
}
|
|
|
|
SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
|
|
throw( RuntimeException )
|
|
{
|
|
SearchResult sres;
|
|
|
|
OUString in_str(searchStr);
|
|
sal_Int32 newStartPos = startPos;
|
|
sal_Int32 newEndPos = endPos;
|
|
|
|
bUsePrimarySrchStr = true;
|
|
|
|
if ( xTranslit.is() )
|
|
{
|
|
// apply normal transliteration (1<->1, 1<->0)
|
|
com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
|
|
in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
|
|
|
|
// JP 20.6.2001: also the start and end positions must be corrected!
|
|
if( startPos )
|
|
newStartPos = FindPosInSeq_Impl( offset, startPos );
|
|
|
|
if( endPos < searchStr.getLength() )
|
|
newEndPos = FindPosInSeq_Impl( offset, endPos );
|
|
else
|
|
newEndPos = in_str.getLength();
|
|
|
|
sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
|
|
|
|
for ( int k = 0; k < sres.startOffset.getLength(); k++ )
|
|
{
|
|
if (sres.startOffset[k])
|
|
sres.startOffset[k] = offset[sres.startOffset[k]];
|
|
// JP 20.6.2001: end is ever exclusive and then don't return
|
|
// the position of the next character - return the
|
|
// next position behind the last found character!
|
|
// "a b c" find "b" must return 2,3 and not 2,4!!!
|
|
if (sres.endOffset[k])
|
|
sres.endOffset[k] = offset[sres.endOffset[k]-1] + 1;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sres = (this->*fnForward)( in_str, startPos, endPos );
|
|
}
|
|
|
|
if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
|
|
{
|
|
SearchResult sres2;
|
|
|
|
in_str = OUString(searchStr);
|
|
com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
|
|
|
|
in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
|
|
|
|
if( startPos )
|
|
startPos = FindPosInSeq_Impl( offset, startPos );
|
|
|
|
if( endPos < searchStr.getLength() )
|
|
endPos = FindPosInSeq_Impl( offset, endPos );
|
|
else
|
|
endPos = in_str.getLength();
|
|
|
|
bUsePrimarySrchStr = false;
|
|
sres2 = (this->*fnForward)( in_str, startPos, endPos );
|
|
|
|
for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
|
|
{
|
|
if (sres2.startOffset[k])
|
|
sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
|
|
if (sres2.endOffset[k])
|
|
sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
|
|
}
|
|
|
|
// pick first and long one
|
|
if ( sres.subRegExpressions == 0)
|
|
return sres2;
|
|
if ( sres2.subRegExpressions == 1)
|
|
{
|
|
if ( sres.startOffset[0] > sres2.startOffset[0])
|
|
return sres2;
|
|
else if ( sres.startOffset[0] == sres2.startOffset[0] &&
|
|
sres.endOffset[0] < sres2.endOffset[0])
|
|
return sres2;
|
|
}
|
|
}
|
|
|
|
return sres;
|
|
}
|
|
|
|
SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
|
|
throw(RuntimeException)
|
|
{
|
|
SearchResult sres;
|
|
|
|
OUString in_str(searchStr);
|
|
sal_Int32 newStartPos = startPos;
|
|
sal_Int32 newEndPos = endPos;
|
|
|
|
bUsePrimarySrchStr = true;
|
|
|
|
if ( xTranslit.is() )
|
|
{
|
|
// apply only simple 1<->1 transliteration here
|
|
com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
|
|
in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
|
|
|
|
// JP 20.6.2001: also the start and end positions must be corrected!
|
|
if( startPos < searchStr.getLength() )
|
|
newStartPos = FindPosInSeq_Impl( offset, startPos );
|
|
else
|
|
newStartPos = in_str.getLength();
|
|
|
|
if( endPos )
|
|
newEndPos = FindPosInSeq_Impl( offset, endPos );
|
|
|
|
sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
|
|
|
|
for ( int k = 0; k < sres.startOffset.getLength(); k++ )
|
|
{
|
|
if (sres.startOffset[k])
|
|
sres.startOffset[k] = offset[sres.startOffset[k] - 1] + 1;
|
|
// JP 20.6.2001: end is ever exclusive and then don't return
|
|
// the position of the next character - return the
|
|
// next position behind the last found character!
|
|
// "a b c" find "b" must return 2,3 and not 2,4!!!
|
|
if (sres.endOffset[k])
|
|
sres.endOffset[k] = offset[sres.endOffset[k]];
|
|
}
|
|
}
|
|
else
|
|
{
|
|
sres = (this->*fnBackward)( in_str, startPos, endPos );
|
|
}
|
|
|
|
if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
|
|
{
|
|
SearchResult sres2;
|
|
|
|
in_str = OUString(searchStr);
|
|
com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
|
|
|
|
in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
|
|
|
|
if( startPos < searchStr.getLength() )
|
|
startPos = FindPosInSeq_Impl( offset, startPos );
|
|
else
|
|
startPos = in_str.getLength();
|
|
|
|
if( endPos )
|
|
endPos = FindPosInSeq_Impl( offset, endPos );
|
|
|
|
bUsePrimarySrchStr = false;
|
|
sres2 = (this->*fnBackward)( in_str, startPos, endPos );
|
|
|
|
for( int k = 0; k < sres2.startOffset.getLength(); k++ )
|
|
{
|
|
if (sres2.startOffset[k])
|
|
sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
|
|
if (sres2.endOffset[k])
|
|
sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
|
|
}
|
|
|
|
// pick last and long one
|
|
if ( sres.subRegExpressions == 0 )
|
|
return sres2;
|
|
if ( sres2.subRegExpressions == 1 )
|
|
{
|
|
if ( sres.startOffset[0] < sres2.startOffset[0] )
|
|
return sres2;
|
|
if ( sres.startOffset[0] == sres2.startOffset[0] &&
|
|
sres.endOffset[0] > sres2.endOffset[0] )
|
|
return sres2;
|
|
}
|
|
}
|
|
|
|
return sres;
|
|
}
|
|
|
|
|
|
|
|
//--------------- die Wort-Trennner ----------------------------------
|
|
|
|
bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
|
|
{
|
|
bool bRet = 1;
|
|
if( '\x7f' != rStr[nPos])
|
|
{
|
|
if ( !xCharClass.is() )
|
|
{
|
|
Reference < XInterface > xI = xMSF->createInstance(
|
|
OUString(RTL_CONSTASCII_USTRINGPARAM("com.sun.star.i18n.CharacterClassification")));
|
|
if( xI.is() )
|
|
xI->queryInterface( ::getCppuType(
|
|
(const Reference< XCharacterClassification >*)0))
|
|
>>= xCharClass;
|
|
}
|
|
if ( xCharClass.is() )
|
|
{
|
|
sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
|
|
aSrchPara.Locale );
|
|
if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
|
|
KCharacterType::LETTER ) & nCType ) )
|
|
bRet = 0;
|
|
}
|
|
}
|
|
return bRet;
|
|
}
|
|
|
|
|
|
|
|
// --------- methods for the kind of boyer-morre search ------------------
|
|
|
|
|
|
void TextSearch::MakeForwardTab()
|
|
{
|
|
// create the jumptable for the search text
|
|
if( pJumpTable )
|
|
{
|
|
if( bIsForwardTab )
|
|
return ; // the jumpTable is ok
|
|
delete pJumpTable;
|
|
}
|
|
bIsForwardTab = true;
|
|
|
|
sal_Int32 n, nLen = sSrchStr.getLength();
|
|
pJumpTable = new TextSearchJumpTable;
|
|
|
|
for( n = 0; n < nLen - 1; ++n )
|
|
{
|
|
sal_Unicode cCh = sSrchStr[n];
|
|
sal_Int32 nDiff = nLen - n - 1;
|
|
TextSearchJumpTable::value_type aEntry( cCh, nDiff );
|
|
|
|
::std::pair< TextSearchJumpTable::iterator, bool > aPair =
|
|
pJumpTable->insert( aEntry );
|
|
if ( !aPair.second )
|
|
(*(aPair.first)).second = nDiff;
|
|
}
|
|
}
|
|
|
|
void TextSearch::MakeForwardTab2()
|
|
{
|
|
// create the jumptable for the search text
|
|
if( pJumpTable2 )
|
|
{
|
|
if( bIsForwardTab )
|
|
return ; // the jumpTable is ok
|
|
delete pJumpTable2;
|
|
}
|
|
bIsForwardTab = true;
|
|
|
|
sal_Int32 n, nLen = sSrchStr2.getLength();
|
|
pJumpTable2 = new TextSearchJumpTable;
|
|
|
|
for( n = 0; n < nLen - 1; ++n )
|
|
{
|
|
sal_Unicode cCh = sSrchStr2[n];
|
|
sal_Int32 nDiff = nLen - n - 1;
|
|
|
|
TextSearchJumpTable::value_type aEntry( cCh, nDiff );
|
|
::std::pair< TextSearchJumpTable::iterator, bool > aPair =
|
|
pJumpTable2->insert( aEntry );
|
|
if ( !aPair.second )
|
|
(*(aPair.first)).second = nDiff;
|
|
}
|
|
}
|
|
|
|
void TextSearch::MakeBackwardTab()
|
|
{
|
|
// create the jumptable for the search text
|
|
if( pJumpTable )
|
|
{
|
|
if( !bIsForwardTab )
|
|
return ; // the jumpTable is ok
|
|
delete pJumpTable;
|
|
}
|
|
bIsForwardTab = false;
|
|
|
|
sal_Int32 n, nLen = sSrchStr.getLength();
|
|
pJumpTable = new TextSearchJumpTable;
|
|
|
|
for( n = nLen-1; n > 0; --n )
|
|
{
|
|
sal_Unicode cCh = sSrchStr[n];
|
|
TextSearchJumpTable::value_type aEntry( cCh, n );
|
|
::std::pair< TextSearchJumpTable::iterator, bool > aPair =
|
|
pJumpTable->insert( aEntry );
|
|
if ( !aPair.second )
|
|
(*(aPair.first)).second = n;
|
|
}
|
|
}
|
|
|
|
void TextSearch::MakeBackwardTab2()
|
|
{
|
|
// create the jumptable for the search text
|
|
if( pJumpTable2 )
|
|
{
|
|
if( !bIsForwardTab )
|
|
return ; // the jumpTable is ok
|
|
delete pJumpTable2;
|
|
}
|
|
bIsForwardTab = false;
|
|
|
|
sal_Int32 n, nLen = sSrchStr2.getLength();
|
|
pJumpTable2 = new TextSearchJumpTable;
|
|
|
|
for( n = nLen-1; n > 0; --n )
|
|
{
|
|
sal_Unicode cCh = sSrchStr2[n];
|
|
TextSearchJumpTable::value_type aEntry( cCh, n );
|
|
::std::pair< TextSearchJumpTable::iterator, bool > aPair =
|
|
pJumpTable2->insert( aEntry );
|
|
if ( !aPair.second )
|
|
(*(aPair.first)).second = n;
|
|
}
|
|
}
|
|
|
|
sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
|
|
{
|
|
TextSearchJumpTable *pJump;
|
|
OUString sSearchKey;
|
|
|
|
if ( bUsePrimarySrchStr ) {
|
|
pJump = pJumpTable;
|
|
sSearchKey = sSrchStr;
|
|
} else {
|
|
pJump = pJumpTable2;
|
|
sSearchKey = sSrchStr2;
|
|
}
|
|
|
|
TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
|
|
if ( iLook == pJump->end() )
|
|
return sSearchKey.getLength();
|
|
return (*iLook).second;
|
|
}
|
|
|
|
|
|
// TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#)
|
|
SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
|
|
throw(RuntimeException)
|
|
{
|
|
SearchResult aRet;
|
|
aRet.subRegExpressions = 0;
|
|
|
|
OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
|
|
|
|
OUString aStr( searchStr );
|
|
sal_Int32 nSuchIdx = aStr.getLength();
|
|
sal_Int32 nEnde = endPos;
|
|
if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
|
|
return aRet;
|
|
|
|
|
|
if( nEnde < sSearchKey.getLength() ) // position inside the search region ?
|
|
return aRet;
|
|
|
|
nEnde -= sSearchKey.getLength();
|
|
|
|
if (bUsePrimarySrchStr)
|
|
MakeForwardTab(); // create the jumptable
|
|
else
|
|
MakeForwardTab2();
|
|
|
|
for (sal_Int32 nCmpIdx = startPos; // start position for the search
|
|
nCmpIdx <= nEnde;
|
|
nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
|
|
{
|
|
// if the match would be the completed cells, skip it.
|
|
if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
|
|
&& !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
|
|
continue;
|
|
|
|
nSuchIdx = sSearchKey.getLength() - 1;
|
|
while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
|
|
{
|
|
if( nSuchIdx == 0 )
|
|
{
|
|
if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
|
|
{
|
|
sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
|
|
bool bAtStart = !nCmpIdx;
|
|
bool bAtEnd = nFndEnd == endPos;
|
|
bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
|
|
bool bDelimBehind = IsDelimiter( aStr, nFndEnd );
|
|
// * 1 -> only one word in the paragraph
|
|
// * 2 -> at begin of paragraph
|
|
// * 3 -> at end of paragraph
|
|
// * 4 -> inside the paragraph
|
|
if( !( ( bAtStart && bAtEnd ) || // 1
|
|
( bAtStart && bDelimBehind ) || // 2
|
|
( bAtEnd && bDelimBefore ) || // 3
|
|
( bDelimBefore && bDelimBehind ))) // 4
|
|
break;
|
|
}
|
|
|
|
aRet.subRegExpressions = 1;
|
|
aRet.startOffset.realloc( 1 );
|
|
aRet.startOffset[ 0 ] = nCmpIdx;
|
|
aRet.endOffset.realloc( 1 );
|
|
aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
|
|
|
|
return aRet;
|
|
}
|
|
else
|
|
nSuchIdx--;
|
|
}
|
|
}
|
|
return aRet;
|
|
}
|
|
|
|
SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
|
|
throw(RuntimeException)
|
|
{
|
|
SearchResult aRet;
|
|
aRet.subRegExpressions = 0;
|
|
|
|
OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
|
|
|
|
OUString aStr( searchStr );
|
|
sal_Int32 nSuchIdx = aStr.getLength();
|
|
sal_Int32 nEnde = endPos;
|
|
if( nSuchIdx == 0 || sSearchKey.getLength() == 0 || sSearchKey.getLength() > nSuchIdx)
|
|
return aRet;
|
|
|
|
if (bUsePrimarySrchStr)
|
|
MakeBackwardTab(); // create the jumptable
|
|
else
|
|
MakeBackwardTab2();
|
|
|
|
if( nEnde == nSuchIdx ) // end position for the search
|
|
nEnde = sSearchKey.getLength();
|
|
else
|
|
nEnde += sSearchKey.getLength();
|
|
|
|
sal_Int32 nCmpIdx = startPos; // start position for the search
|
|
|
|
while (nCmpIdx >= nEnde)
|
|
{
|
|
// if the match would be the completed cells, skip it.
|
|
if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
|
|
sSearchKey.getLength() )) && (!checkCTLEnd ||
|
|
isCellStart( aStr, nCmpIdx)))
|
|
{
|
|
nSuchIdx = 0;
|
|
while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
|
|
aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
|
|
nSuchIdx++;
|
|
if( nSuchIdx >= sSearchKey.getLength() )
|
|
{
|
|
if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
|
|
{
|
|
sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
|
|
bool bAtStart = !nFndStt;
|
|
bool bAtEnd = nCmpIdx == startPos;
|
|
bool bDelimBehind = IsDelimiter( aStr, nCmpIdx );
|
|
bool bDelimBefore = bAtStart || // begin of paragraph
|
|
IsDelimiter( aStr, nFndStt-1 );
|
|
// * 1 -> only one word in the paragraph
|
|
// * 2 -> at begin of paragraph
|
|
// * 3 -> at end of paragraph
|
|
// * 4 -> inside the paragraph
|
|
if( ( bAtStart && bAtEnd ) || // 1
|
|
( bAtStart && bDelimBehind ) || // 2
|
|
( bAtEnd && bDelimBefore ) || // 3
|
|
( bDelimBefore && bDelimBehind )) // 4
|
|
{
|
|
aRet.subRegExpressions = 1;
|
|
aRet.startOffset.realloc( 1 );
|
|
aRet.startOffset[ 0 ] = nCmpIdx;
|
|
aRet.endOffset.realloc( 1 );
|
|
aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
|
|
return aRet;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
aRet.subRegExpressions = 1;
|
|
aRet.startOffset.realloc( 1 );
|
|
aRet.startOffset[ 0 ] = nCmpIdx;
|
|
aRet.endOffset.realloc( 1 );
|
|
aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
|
|
return aRet;
|
|
}
|
|
}
|
|
}
|
|
nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
|
|
if( nCmpIdx < nSuchIdx )
|
|
return aRet;
|
|
nCmpIdx -= nSuchIdx;
|
|
}
|
|
return aRet;
|
|
}
|
|
|
|
|
|
|
|
//---------------------------------------------------------------------------
|
|
// ------- Methoden fuer die Suche ueber Regular-Expressions --------------
|
|
|
|
SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
|
|
sal_Int32 startPos, sal_Int32 endPos )
|
|
throw(RuntimeException)
|
|
{
|
|
SearchResult aRet;
|
|
aRet.subRegExpressions = 0;
|
|
OUString aStr( searchStr );
|
|
|
|
bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
|
|
SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
|
|
|
|
pRegExp->set_line(aStr.getStr(), bSearchInSel ? endPos : aStr.getLength());
|
|
|
|
struct re_registers regs;
|
|
|
|
// Clear structure
|
|
memset((void *)®s, 0, sizeof(struct re_registers));
|
|
if ( ! pRegExp->re_search(®s, startPos) )
|
|
{
|
|
if( regs.num_of_match > 0 &&
|
|
(regs.start[0] != -1 && regs.end[0] != -1) )
|
|
{
|
|
aRet.startOffset.realloc(regs.num_of_match);
|
|
aRet.endOffset.realloc(regs.num_of_match);
|
|
|
|
sal_Int32 i = 0, j = 0;
|
|
while( j < regs.num_of_match )
|
|
{
|
|
if( regs.start[j] != -1 && regs.end[j] != -1 )
|
|
{
|
|
aRet.startOffset[i] = regs.start[j];
|
|
aRet.endOffset[i] = regs.end[j];
|
|
++i;
|
|
}
|
|
++j;
|
|
}
|
|
aRet.subRegExpressions = i;
|
|
}
|
|
if ( regs.num_regs > 0 )
|
|
{
|
|
if ( regs.start )
|
|
free(regs.start);
|
|
if ( regs.end )
|
|
free(regs.end);
|
|
}
|
|
}
|
|
|
|
return aRet;
|
|
}
|
|
|
|
/*
|
|
* Sucht das Muster aSrchPara.sSrchStr rueckwaerts im String rStr
|
|
*/
|
|
SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
|
|
sal_Int32 startPos, sal_Int32 endPos )
|
|
throw(RuntimeException)
|
|
{
|
|
SearchResult aRet;
|
|
aRet.subRegExpressions = 0;
|
|
OUString aStr( searchStr );
|
|
|
|
sal_Int32 nOffset = 0;
|
|
sal_Int32 nStrEnde = aStr.getLength() == endPos ? 0 : endPos;
|
|
|
|
bool bSearchInSel = (0 != (( SearchFlags::REG_NOT_BEGINOFLINE |
|
|
SearchFlags::REG_NOT_ENDOFLINE ) & aSrchPara.searchFlag ));
|
|
|
|
if( startPos )
|
|
nOffset = startPos - 1;
|
|
|
|
// search only in the subString
|
|
if( bSearchInSel && nStrEnde )
|
|
{
|
|
aStr = aStr.copy( nStrEnde, aStr.getLength() - nStrEnde );
|
|
if( nOffset > nStrEnde )
|
|
nOffset = nOffset - nStrEnde;
|
|
else
|
|
nOffset = 0;
|
|
}
|
|
|
|
// set the length to negative for reverse search
|
|
pRegExp->set_line( aStr.getStr(), -(aStr.getLength()) );
|
|
struct re_registers regs;
|
|
|
|
// Clear structure
|
|
memset((void *)®s, 0, sizeof(struct re_registers));
|
|
if ( ! pRegExp->re_search(®s, nOffset) )
|
|
{
|
|
if( regs.num_of_match > 0 &&
|
|
(regs.start[0] != -1 && regs.end[0] != -1) )
|
|
{
|
|
nOffset = bSearchInSel ? nStrEnde : 0;
|
|
aRet.startOffset.realloc(regs.num_of_match);
|
|
aRet.endOffset.realloc(regs.num_of_match);
|
|
|
|
sal_Int32 i = 0, j = 0;
|
|
while( j < regs.num_of_match )
|
|
{
|
|
if( regs.start[j] != -1 && regs.end[j] != -1 )
|
|
{
|
|
aRet.startOffset[i] = regs.end[j] + nOffset;
|
|
aRet.endOffset[i] = regs.start[j] + nOffset;
|
|
++i;
|
|
}
|
|
++j;
|
|
}
|
|
aRet.subRegExpressions = i;
|
|
}
|
|
if ( regs.num_regs > 0 )
|
|
{
|
|
if ( regs.start )
|
|
free(regs.start);
|
|
if ( regs.end )
|
|
free(regs.end);
|
|
}
|
|
}
|
|
|
|
return aRet;
|
|
}
|
|
|
|
// Phonetische Suche von Worten
|
|
SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
|
|
sal_Int32 startPos, sal_Int32 endPos )
|
|
throw(RuntimeException)
|
|
{
|
|
SearchResult aRet;
|
|
aRet.subRegExpressions = 0;
|
|
|
|
if( !xBreak.is() )
|
|
return aRet;
|
|
|
|
OUString aWTemp( searchStr );
|
|
|
|
register sal_Int32 nStt, nEnd;
|
|
|
|
Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
|
|
aSrchPara.Locale,
|
|
WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
|
|
|
|
do
|
|
{
|
|
if( aWBnd.startPos >= endPos )
|
|
break;
|
|
nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
|
|
nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
|
|
|
|
if( nStt < nEnd &&
|
|
pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
|
|
{
|
|
aRet.subRegExpressions = 1;
|
|
aRet.startOffset.realloc( 1 );
|
|
aRet.startOffset[ 0 ] = nStt;
|
|
aRet.endOffset.realloc( 1 );
|
|
aRet.endOffset[ 0 ] = nEnd;
|
|
break;
|
|
}
|
|
|
|
nStt = nEnd - 1;
|
|
aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
|
|
WordType::ANYWORD_IGNOREWHITESPACES);
|
|
} while( aWBnd.startPos != aWBnd.endPos ||
|
|
(aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
|
|
// #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
|
|
// whitespace) in searchStr, getWordBoundary() returned startPos,startPos
|
|
// and nextWord() does also => don't loop forever.
|
|
return aRet;
|
|
}
|
|
|
|
SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
|
|
sal_Int32 startPos, sal_Int32 endPos )
|
|
throw(RuntimeException)
|
|
{
|
|
SearchResult aRet;
|
|
aRet.subRegExpressions = 0;
|
|
|
|
if( !xBreak.is() )
|
|
return aRet;
|
|
|
|
OUString aWTemp( searchStr );
|
|
|
|
register sal_Int32 nStt, nEnd;
|
|
|
|
Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
|
|
aSrchPara.Locale,
|
|
WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
|
|
|
|
do
|
|
{
|
|
if( aWBnd.endPos <= endPos )
|
|
break;
|
|
nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
|
|
nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
|
|
|
|
if( nStt < nEnd &&
|
|
pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
|
|
{
|
|
aRet.subRegExpressions = 1;
|
|
aRet.startOffset.realloc( 1 );
|
|
aRet.startOffset[ 0 ] = nEnd;
|
|
aRet.endOffset.realloc( 1 );
|
|
aRet.endOffset[ 0 ] = nStt;
|
|
break;
|
|
}
|
|
if( !nStt )
|
|
break;
|
|
|
|
aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
|
|
WordType::ANYWORD_IGNOREWHITESPACES);
|
|
} while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
|
|
return aRet;
|
|
}
|
|
|
|
|
|
static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch";
|
|
static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
|
|
|
|
static OUString getServiceName_Static()
|
|
{
|
|
return OUString::createFromAscii( cSearchName );
|
|
}
|
|
|
|
static OUString getImplementationName_Static()
|
|
{
|
|
return OUString::createFromAscii( cSearchImpl );
|
|
}
|
|
|
|
OUString SAL_CALL
|
|
TextSearch::getImplementationName()
|
|
throw( RuntimeException )
|
|
{
|
|
return getImplementationName_Static();
|
|
}
|
|
|
|
sal_Bool SAL_CALL
|
|
TextSearch::supportsService(const OUString& rServiceName)
|
|
throw( RuntimeException )
|
|
{
|
|
return !rServiceName.compareToAscii( cSearchName );
|
|
}
|
|
|
|
Sequence< OUString > SAL_CALL
|
|
TextSearch::getSupportedServiceNames(void) throw( RuntimeException )
|
|
{
|
|
Sequence< OUString > aRet(1);
|
|
aRet[0] = getServiceName_Static();
|
|
return aRet;
|
|
}
|
|
|
|
::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
|
|
SAL_CALL TextSearch_CreateInstance(
|
|
const ::com::sun::star::uno::Reference<
|
|
::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
|
|
{
|
|
return ::com::sun::star::uno::Reference<
|
|
::com::sun::star::uno::XInterface >(
|
|
(::cppu::OWeakObject*) new TextSearch( rxMSF ) );
|
|
}
|
|
|
|
extern "C"
|
|
{
|
|
|
|
SAL_DLLPUBLIC_EXPORT void* SAL_CALL component_getFactory( const sal_Char* sImplementationName,
|
|
void* _pServiceManager, void* /*_pRegistryKey*/ )
|
|
{
|
|
void* pRet = NULL;
|
|
|
|
::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
|
|
reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* >
|
|
( _pServiceManager );
|
|
::com::sun::star::uno::Reference<
|
|
::com::sun::star::lang::XSingleServiceFactory > xFactory;
|
|
|
|
if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
|
|
{
|
|
::com::sun::star::uno::Sequence< ::rtl::OUString > aServiceNames(1);
|
|
aServiceNames[0] = getServiceName_Static();
|
|
xFactory = ::cppu::createSingleFactory(
|
|
pServiceManager, getImplementationName_Static(),
|
|
&TextSearch_CreateInstance, aServiceNames );
|
|
}
|
|
|
|
if ( xFactory.is() )
|
|
{
|
|
xFactory->acquire();
|
|
pRet = xFactory.get();
|
|
}
|
|
|
|
return pRet;
|
|
}
|
|
|
|
} // extern "C"
|
|
|
|
/* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|