LCOV - commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae - i18npool/source/search/textsearch.cxx

LCOV - code coverage report

Current view:	top level - i18npool/source/search - textsearch.cxx (source / functions)		Hit	Total	Coverage
Test:	commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae	Lines:	250	551	45.4 %
Date:	2014-11-03	Functions:	18	29	62.1 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "textsearch.hxx"
      21             : #include "levdis.hxx"
      22             : #include <com/sun/star/lang/Locale.hpp>
      23             : #include <com/sun/star/lang/XMultiServiceFactory.hpp>
      24             : #include <comphelper/processfactory.hxx>
      25             : #include <com/sun/star/i18n/BreakIterator.hpp>
      26             : #include <com/sun/star/i18n/UnicodeType.hpp>
      27             : #include <com/sun/star/util/SearchFlags.hpp>
      28             : #include <com/sun/star/i18n/WordType.hpp>
      29             : #include <com/sun/star/i18n/ScriptType.hpp>
      30             : #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
      31             : #include <com/sun/star/i18n/CharacterClassification.hpp>
      32             : #include <com/sun/star/i18n/KCharacterType.hpp>
      33             : #include <com/sun/star/i18n/Transliteration.hpp>
      34             : #include <com/sun/star/registry/XRegistryKey.hpp>
      35             : #include <cppuhelper/factory.hxx>
      36             : #include <cppuhelper/supportsservice.hxx>
      37             : #include <cppuhelper/weak.hxx>
      38             : 
      39             : #ifdef _MSC_VER
      40             : // get rid of that dumb compiler warning
      41             : // identifier was truncated to '255' characters in the debug information
      42             : // for STL template usage, if .pdb files are to be created
      43             : #pragma warning( disable: 4786 )
      44             : #endif
      45             : 
      46             : #include <string.h>
      47             : 
      48             : using namespace ::com::sun::star::util;
      49             : using namespace ::com::sun::star::uno;
      50             : using namespace ::com::sun::star::lang;
      51             : using namespace ::com::sun::star::i18n;
      52             : using namespace ::com::sun::star;
      53             : 
      54             : static const sal_Int32 COMPLEX_TRANS_MASK_TMP =
      55             :     TransliterationModules_ignoreBaFa_ja_JP |
      56             :     TransliterationModules_ignoreIterationMark_ja_JP |
      57             :     TransliterationModules_ignoreTiJi_ja_JP |
      58             :     TransliterationModules_ignoreHyuByu_ja_JP |
      59             :     TransliterationModules_ignoreSeZe_ja_JP |
      60             :     TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
      61             :     TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
      62             :     TransliterationModules_ignoreProlongedSoundMark_ja_JP;
      63             : 
      64             : // These 2 transliterations are simple but need to take effect in
      65             : // complex transliteration.
      66             : static const sal_Int32 COMPLEX_TRANS_MASK =
      67             :     COMPLEX_TRANS_MASK_TMP |
      68             :     TransliterationModules_IGNORE_KANA |
      69             :     TransliterationModules_FULLWIDTH_HALFWIDTH;
      70             : 
      71             : static const sal_Int32 SIMPLE_TRANS_MASK = ~COMPLEX_TRANS_MASK;
      72             : 
      73             : // Regex patterns are case sensitive.
      74             : static const sal_Int32 SIMPLE_TRANS_MASK_REPATTERN =
      75             :     ~(COMPLEX_TRANS_MASK |
      76             :             TransliterationModules_IGNORE_CASE |
      77             :             TransliterationModules_UPPERCASE_LOWERCASE |
      78             :             TransliterationModules_LOWERCASE_UPPERCASE);
      79             : 
      80             : 
      81         112 : TextSearch::TextSearch(const Reference < XComponentContext > & rxContext)
      82             :         : m_xContext( rxContext )
      83             :         , pJumpTable( 0 )
      84             :         , pJumpTable2( 0 )
      85             :         , pRegexMatcher( NULL )
      86         112 :         , pWLD( 0 )
      87             : {
      88         112 :     SearchOptions aOpt;
      89         112 :     aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
      90         112 :     aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
      91             :     //aOpt.Locale = ???;
      92         112 :     setOptions( aOpt );
      93         112 : }
      94             : 
      95         288 : TextSearch::~TextSearch()
      96             : {
      97          96 :     delete pRegexMatcher;
      98          96 :     delete pWLD;
      99          96 :     delete pJumpTable;
     100          96 :     delete pJumpTable2;
     101         192 : }
     102             : 
     103         220 : void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException, std::exception )
     104             : {
     105         220 :     aSrchPara = rOptions;
     106             : 
     107         220 :     delete pRegexMatcher, pRegexMatcher = NULL;
     108         220 :     delete pWLD, pWLD = 0;
     109         220 :     delete pJumpTable, pJumpTable = 0;
     110         220 :     delete pJumpTable2, pJumpTable2 = 0;
     111             : 
     112             :     // Create Transliteration class
     113         220 :     if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
     114             :     {
     115          88 :         if( !xTranslit.is() )
     116          88 :             xTranslit.set( Transliteration::create( m_xContext ) );
     117          88 :         xTranslit->loadModule(
     118          88 :              (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
     119         176 :              aSrchPara.Locale);
     120             :     }
     121         132 :     else if( xTranslit.is() )
     122           0 :         xTranslit = 0;
     123             : 
     124             :     // Create Transliteration for 2<->1, 2<->2 transliteration
     125         220 :     if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
     126             :     {
     127           0 :         if( !xTranslit2.is() )
     128           0 :             xTranslit2.set( Transliteration::create( m_xContext ) );
     129             :         // Load transliteration module
     130           0 :         xTranslit2->loadModule(
     131           0 :              (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
     132           0 :              aSrchPara.Locale);
     133             :     }
     134             : 
     135         220 :     if ( !xBreak.is() )
     136         112 :         xBreak = com::sun::star::i18n::BreakIterator::create( m_xContext );
     137             : 
     138         220 :     sSrchStr = aSrchPara.searchString;
     139             : 
     140             :     // Transliterate search string.
     141         220 :     if (aSrchPara.algorithmType == SearchAlgorithms_REGEXP)
     142             :     {
     143          34 :         if (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK_REPATTERN)
     144             :         {
     145           0 :             if ((aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK_REPATTERN) !=
     146             :                     (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK))
     147             :             {
     148             :                 com::sun::star::uno::Reference< XExtendedTransliteration > xTranslitPattern(
     149           0 :                          Transliteration::create( m_xContext ));
     150           0 :                 if (xTranslitPattern.is())
     151             :                 {
     152           0 :                     xTranslitPattern->loadModule(
     153           0 :                             (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK_REPATTERN ),
     154           0 :                             aSrchPara.Locale);
     155           0 :                     sSrchStr = xTranslitPattern->transliterateString2String(
     156           0 :                             aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
     157           0 :                 }
     158             :             }
     159             :             else
     160             :             {
     161           0 :                 if (xTranslit.is())
     162           0 :                     sSrchStr = xTranslit->transliterateString2String(
     163           0 :                             aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
     164             :             }
     165             :             // xTranslit2 complex transliterated sSrchStr2 is not used in
     166             :             // regex, see TextSearch::searchForward() and
     167             :             // TextSearch::searchBackward()
     168             :         }
     169             :     }
     170             :     else
     171             :     {
     172         186 :         if ( xTranslit.is() && aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
     173         222 :             sSrchStr = xTranslit->transliterateString2String(
     174         148 :                     aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
     175             : 
     176         186 :         if ( xTranslit2.is() && aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
     177           0 :             sSrchStr2 = xTranslit2->transliterateString2String(
     178           0 :                     aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
     179             :     }
     180             : 
     181             :     // When start or end of search string is a complex script type, we need to
     182             :     // make sure the result boundary is not located in the middle of cell.
     183         220 :     checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
     184         220 :                 ScriptType::COMPLEX));
     185         440 :     checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
     186         440 :                     sSrchStr.getLength()-1) == ScriptType::COMPLEX));
     187             : 
     188         220 :     switch( aSrchPara.algorithmType)
     189             :     {
     190             :         case SearchAlgorithms_REGEXP:
     191          34 :             fnForward = &TextSearch::RESrchFrwrd;
     192          34 :             fnBackward = &TextSearch::RESrchBkwrd;
     193          34 :             RESrchPrepare( aSrchPara);
     194          34 :             break;
     195             : 
     196             :         case SearchAlgorithms_APPROXIMATE:
     197           0 :             fnForward = &TextSearch::ApproxSrchFrwrd;
     198           0 :             fnBackward = &TextSearch::ApproxSrchBkwrd;
     199             : 
     200           0 :             pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
     201             :                     aSrchPara.insertedChars, aSrchPara.deletedChars,
     202           0 :                     0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
     203             : 
     204           0 :             nLimit = pWLD->GetLimit();
     205           0 :             break;
     206             : 
     207             :         default:
     208         186 :             fnForward = &TextSearch::NSrchFrwrd;
     209         186 :             fnBackward = &TextSearch::NSrchBkwrd;
     210         186 :             break;
     211             :     }
     212         220 : }
     213             : 
     214         158 : sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
     215             : {
     216         158 :     sal_Int32 nRet = 0, nEnd = rOff.getLength();
     217         158 :     while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
     218         158 :     return nRet;
     219             : }
     220             : 
     221           0 : bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
     222             :         throw( RuntimeException )
     223             : {
     224             :     sal_Int32 nDone;
     225           0 :     return nPos == xBreak->previousCharacters(searchStr, nPos+1,
     226           0 :             aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
     227             : }
     228             : 
     229        2374 : SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
     230             :         throw( RuntimeException, std::exception )
     231             : {
     232        2374 :     SearchResult sres;
     233             : 
     234        4748 :     OUString in_str(searchStr);
     235        2374 :     sal_Int32 newStartPos = startPos;
     236        2374 :     sal_Int32 newEndPos = endPos;
     237             : 
     238        2374 :     bUsePrimarySrchStr = true;
     239             : 
     240        2374 :     if ( xTranslit.is() )
     241             :     {
     242             :         // apply normal transliteration (1<->1, 1<->0)
     243         872 :         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
     244         872 :         in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
     245             : 
     246             :         // JP 20.6.2001: also the start and end positions must be corrected!
     247         872 :         if( startPos )
     248         158 :             newStartPos = FindPosInSeq_Impl( offset, startPos );
     249             : 
     250         872 :         if( endPos < searchStr.getLength() )
     251           0 :             newEndPos = FindPosInSeq_Impl( offset, endPos );
     252             :         else
     253         872 :             newEndPos = in_str.getLength();
     254             : 
     255         872 :         sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
     256             : 
     257             :         // Map offsets back to untransliterated string.
     258         872 :         const sal_Int32 nOffsets = offset.getLength();
     259         872 :         if (nOffsets)
     260             :         {
     261             :             // For regex nGroups is the number of groups+1 with group 0 being
     262             :             // the entire match.
     263         872 :             const sal_Int32 nGroups = sres.startOffset.getLength();
     264        1168 :             for ( sal_Int32 k = 0; k < nGroups; k++ )
     265             :             {
     266         296 :                 const sal_Int32 nStart = sres.startOffset[k];
     267         296 :                 if (nStart > 0)
     268         160 :                     sres.startOffset[k] = (nStart < nOffsets ? offset[nStart] : (offset[nOffsets - 1] + 1));
     269             :                 // JP 20.6.2001: end is ever exclusive and then don't return
     270             :                 //               the position of the next character - return the
     271             :                 //               next position behind the last found character!
     272             :                 //               "a b c" find "b" must return 2,3 and not 2,4!!!
     273         296 :                 const sal_Int32 nStop = sres.endOffset[k];
     274         296 :                 if (nStop > 0)
     275         296 :                     sres.endOffset[k] = offset[(nStop <= nOffsets ? nStop : nOffsets) - 1] + 1;
     276             :             }
     277         872 :         }
     278             :     }
     279             :     else
     280             :     {
     281        1502 :         sres = (this->*fnForward)( in_str, startPos, endPos );
     282             :     }
     283             : 
     284        2374 :     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
     285             :     {
     286           0 :         SearchResult sres2;
     287             : 
     288           0 :         in_str = OUString(searchStr);
     289           0 :         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
     290             : 
     291           0 :         in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
     292             : 
     293           0 :         if( startPos )
     294           0 :             startPos = FindPosInSeq_Impl( offset, startPos );
     295             : 
     296           0 :         if( endPos < searchStr.getLength() )
     297           0 :             endPos = FindPosInSeq_Impl( offset, endPos );
     298             :         else
     299           0 :             endPos = in_str.getLength();
     300             : 
     301           0 :     bUsePrimarySrchStr = false;
     302           0 :         sres2 = (this->*fnForward)( in_str, startPos, endPos );
     303             : 
     304           0 :         for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
     305             :         {
     306           0 :             if (sres2.startOffset[k])
     307           0 :           sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
     308           0 :             if (sres2.endOffset[k])
     309           0 :           sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
     310             :         }
     311             : 
     312             :     // pick first and long one
     313           0 :     if ( sres.subRegExpressions == 0)
     314           0 :         return sres2;
     315           0 :     if ( sres2.subRegExpressions == 1)
     316             :     {
     317           0 :         if ( sres.startOffset[0] > sres2.startOffset[0])
     318           0 :             return sres2;
     319           0 :         else if ( sres.startOffset[0] == sres2.startOffset[0] &&
     320           0 :             sres.endOffset[0] < sres2.endOffset[0])
     321           0 :             return sres2;
     322           0 :     }
     323             :     }
     324             : 
     325        4748 :     return sres;
     326             : }
     327             : 
     328           2 : SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
     329             :         throw(RuntimeException, std::exception)
     330             : {
     331           2 :     SearchResult sres;
     332             : 
     333           4 :     OUString in_str(searchStr);
     334           2 :     sal_Int32 newStartPos = startPos;
     335           2 :     sal_Int32 newEndPos = endPos;
     336             : 
     337           2 :     bUsePrimarySrchStr = true;
     338             : 
     339           2 :     if ( xTranslit.is() )
     340             :     {
     341             :         // apply only simple 1<->1 transliteration here
     342           0 :         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
     343           0 :     in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
     344             : 
     345             :         // JP 20.6.2001: also the start and end positions must be corrected!
     346           0 :         if( startPos < searchStr.getLength() )
     347           0 :             newStartPos = FindPosInSeq_Impl( offset, startPos );
     348             :         else
     349           0 :             newStartPos = in_str.getLength();
     350             : 
     351           0 :         if( endPos )
     352           0 :             newEndPos = FindPosInSeq_Impl( offset, endPos );
     353             : 
     354           0 :         sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
     355             : 
     356             :         // Map offsets back to untransliterated string.
     357           0 :         const sal_Int32 nOffsets = offset.getLength();
     358           0 :         if (nOffsets)
     359             :         {
     360             :             // For regex nGroups is the number of groups+1 with group 0 being
     361             :             // the entire match.
     362           0 :             const sal_Int32 nGroups = sres.startOffset.getLength();
     363           0 :             for ( sal_Int32 k = 0; k < nGroups; k++ )
     364             :             {
     365           0 :                 const sal_Int32 nStart = sres.startOffset[k];
     366           0 :                 if (nStart > 0)
     367           0 :                     sres.startOffset[k] = offset[(nStart <= nOffsets ? nStart : nOffsets) - 1] + 1;
     368             :                 // JP 20.6.2001: end is ever exclusive and then don't return
     369             :                 //               the position of the next character - return the
     370             :                 //               next position behind the last found character!
     371             :                 //               "a b c" find "b" must return 2,3 and not 2,4!!!
     372           0 :                 const sal_Int32 nStop = sres.endOffset[k];
     373           0 :                 if (nStop > 0)
     374           0 :                     sres.endOffset[k] = (nStop < nOffsets ? offset[nStop] : (offset[nOffsets - 1] + 1));
     375             :             }
     376           0 :         }
     377             :     }
     378             :     else
     379             :     {
     380           2 :         sres = (this->*fnBackward)( in_str, startPos, endPos );
     381             :     }
     382             : 
     383           2 :     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
     384             :     {
     385           0 :     SearchResult sres2;
     386             : 
     387           0 :     in_str = OUString(searchStr);
     388           0 :         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
     389             : 
     390           0 :         in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
     391             : 
     392           0 :         if( startPos < searchStr.getLength() )
     393           0 :             startPos = FindPosInSeq_Impl( offset, startPos );
     394             :         else
     395           0 :             startPos = in_str.getLength();
     396             : 
     397           0 :         if( endPos )
     398           0 :             endPos = FindPosInSeq_Impl( offset, endPos );
     399             : 
     400           0 :     bUsePrimarySrchStr = false;
     401           0 :     sres2 = (this->*fnBackward)( in_str, startPos, endPos );
     402             : 
     403           0 :         for( int k = 0; k < sres2.startOffset.getLength(); k++ )
     404             :         {
     405           0 :             if (sres2.startOffset[k])
     406           0 :                 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
     407           0 :             if (sres2.endOffset[k])
     408           0 :                 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
     409             :         }
     410             : 
     411             :     // pick last and long one
     412           0 :     if ( sres.subRegExpressions == 0 )
     413           0 :         return sres2;
     414           0 :     if ( sres2.subRegExpressions == 1 )
     415             :     {
     416           0 :         if ( sres.startOffset[0] < sres2.startOffset[0] )
     417           0 :             return sres2;
     418           0 :         if ( sres.startOffset[0] == sres2.startOffset[0] &&
     419           0 :         sres.endOffset[0] > sres2.endOffset[0] )
     420           0 :             return sres2;
     421           0 :     }
     422             :     }
     423             : 
     424           4 :     return sres;
     425             : }
     426             : 
     427             : 
     428             : 
     429           0 : bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
     430             : {
     431           0 :     bool bRet = true;
     432           0 :     if( '\x7f' != rStr[nPos])
     433             :     {
     434           0 :         if ( !xCharClass.is() )
     435           0 :              xCharClass = CharacterClassification::create( m_xContext );
     436           0 :         sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
     437           0 :                 aSrchPara.Locale );
     438           0 :         if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
     439           0 :                         KCharacterType::LETTER ) & nCType ) )
     440           0 :             bRet = false;
     441             :     }
     442           0 :     return bRet;
     443             : }
     444             : 
     445             : // --------- helper methods for Boyer-Moore like text searching ----------
     446             : // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available
     447             : 
     448         438 : void TextSearch::MakeForwardTab()
     449             : {
     450             :     // create the jumptable for the search text
     451         438 :     if( pJumpTable )
     452             :     {
     453         368 :         if( bIsForwardTab )
     454         806 :             return ;                                        // the jumpTable is ok
     455           0 :         delete pJumpTable;
     456             :     }
     457          70 :     bIsForwardTab = true;
     458             : 
     459          70 :     sal_Int32 n, nLen = sSrchStr.getLength();
     460          70 :     pJumpTable = new TextSearchJumpTable;
     461             : 
     462         482 :     for( n = 0; n < nLen - 1; ++n )
     463             :     {
     464         412 :         sal_Unicode cCh = sSrchStr[n];
     465         412 :         sal_Int32 nDiff = nLen - n - 1;
     466         412 :     TextSearchJumpTable::value_type aEntry( cCh, nDiff );
     467             : 
     468             :         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
     469         412 :             pJumpTable->insert( aEntry );
     470         412 :         if ( !aPair.second )
     471         132 :             (*(aPair.first)).second = nDiff;
     472             :     }
     473             : }
     474             : 
     475           0 : void TextSearch::MakeForwardTab2()
     476             : {
     477             :     // create the jumptable for the search text
     478           0 :     if( pJumpTable2 )
     479             :     {
     480           0 :         if( bIsForwardTab )
     481           0 :             return ;                                        // the jumpTable is ok
     482           0 :         delete pJumpTable2;
     483             :     }
     484           0 :     bIsForwardTab = true;
     485             : 
     486           0 :     sal_Int32 n, nLen = sSrchStr2.getLength();
     487           0 :     pJumpTable2 = new TextSearchJumpTable;
     488             : 
     489           0 :     for( n = 0; n < nLen - 1; ++n )
     490             :     {
     491           0 :         sal_Unicode cCh = sSrchStr2[n];
     492           0 :         sal_Int32 nDiff = nLen - n - 1;
     493             : 
     494           0 :     TextSearchJumpTable::value_type aEntry( cCh, nDiff );
     495             :         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
     496           0 :             pJumpTable2->insert( aEntry );
     497           0 :         if ( !aPair.second )
     498           0 :             (*(aPair.first)).second = nDiff;
     499             :     }
     500             : }
     501             : 
     502           0 : void TextSearch::MakeBackwardTab()
     503             : {
     504             :     // create the jumptable for the search text
     505           0 :     if( pJumpTable )
     506             :     {
     507           0 :         if( !bIsForwardTab )
     508           0 :             return ;                                        // the jumpTable is ok
     509           0 :         delete pJumpTable;
     510             :     }
     511           0 :     bIsForwardTab = false;
     512             : 
     513           0 :     sal_Int32 n, nLen = sSrchStr.getLength();
     514           0 :     pJumpTable = new TextSearchJumpTable;
     515             : 
     516           0 :     for( n = nLen-1; n > 0; --n )
     517             :     {
     518           0 :         sal_Unicode cCh = sSrchStr[n];
     519           0 :         TextSearchJumpTable::value_type aEntry( cCh, n );
     520             :         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
     521           0 :             pJumpTable->insert( aEntry );
     522           0 :         if ( !aPair.second )
     523           0 :             (*(aPair.first)).second = n;
     524             :     }
     525             : }
     526             : 
     527           0 : void TextSearch::MakeBackwardTab2()
     528             : {
     529             :     // create the jumptable for the search text
     530           0 :     if( pJumpTable2 )
     531             :     {
     532           0 :         if( !bIsForwardTab )
     533           0 :             return ;                                        // the jumpTable is ok
     534           0 :         delete pJumpTable2;
     535             :     }
     536           0 :     bIsForwardTab = false;
     537             : 
     538           0 :     sal_Int32 n, nLen = sSrchStr2.getLength();
     539           0 :     pJumpTable2 = new TextSearchJumpTable;
     540             : 
     541           0 :     for( n = nLen-1; n > 0; --n )
     542             :     {
     543           0 :         sal_Unicode cCh = sSrchStr2[n];
     544           0 :         TextSearchJumpTable::value_type aEntry( cCh, n );
     545             :         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
     546           0 :             pJumpTable2->insert( aEntry );
     547           0 :         if ( !aPair.second )
     548           0 :             (*(aPair.first)).second = n;
     549             :     }
     550             : }
     551             : 
     552        1824 : sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
     553             : {
     554             :     TextSearchJumpTable *pJump;
     555        1824 :     OUString sSearchKey;
     556             : 
     557        1824 :     if ( bUsePrimarySrchStr ) {
     558        1824 :       pJump = pJumpTable;
     559        1824 :       sSearchKey = sSrchStr;
     560             :     } else {
     561           0 :       pJump = pJumpTable2;
     562           0 :       sSearchKey = sSrchStr2;
     563             :     }
     564             : 
     565        1824 :     TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
     566        1824 :     if ( iLook == pJump->end() )
     567        1530 :         return sSearchKey.getLength();
     568         294 :     return (*iLook).second;
     569             : }
     570             : 
     571             : 
     572         796 : SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
     573             :         throw(RuntimeException)
     574             : {
     575         796 :     SearchResult aRet;
     576         796 :     aRet.subRegExpressions = 0;
     577             : 
     578        1592 :     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
     579             : 
     580        1592 :     OUString aStr( searchStr );
     581         796 :     sal_Int32 nSuchIdx = aStr.getLength();
     582         796 :     sal_Int32 nEnde = endPos;
     583         796 :     if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
     584         358 :         return aRet;
     585             : 
     586             : 
     587         438 :     if( nEnde < sSearchKey.getLength() )  // position inside the search region ?
     588           0 :         return aRet;
     589             : 
     590         438 :     nEnde -= sSearchKey.getLength();
     591             : 
     592         438 :     if (bUsePrimarySrchStr)
     593         438 :       MakeForwardTab();                   // create the jumptable
     594             :     else
     595           0 :       MakeForwardTab2();
     596             : 
     597        2262 :     for (sal_Int32 nCmpIdx = startPos; // start position for the search
     598             :             nCmpIdx <= nEnde;
     599        1824 :             nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
     600             :     {
     601             :         // if the match would be the completed cells, skip it.
     602        2082 :         if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
     603           0 :                     && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
     604           0 :             continue;
     605             : 
     606        2082 :         nSuchIdx = sSearchKey.getLength() - 1;
     607        6396 :         while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
     608             :         {
     609        2490 :             if( nSuchIdx == 0 )
     610             :             {
     611         258 :                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
     612             :                 {
     613           0 :                     sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
     614           0 :                     bool bAtStart = !nCmpIdx;
     615           0 :                     bool bAtEnd = nFndEnd == endPos;
     616           0 :                     bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
     617           0 :                     bool bDelimBehind = bAtEnd || IsDelimiter(  aStr, nFndEnd );
     618             :                     //  *       1 -> only one word in the paragraph
     619             :                     //  *       2 -> at begin of paragraph
     620             :                     //  *       3 -> at end of paragraph
     621             :                     //  *       4 -> inside the paragraph
     622           0 :                     if( !(  ( bAtStart && bAtEnd ) ||           // 1
     623           0 :                                 ( bAtStart && bDelimBehind ) ||     // 2
     624           0 :                                 ( bAtEnd && bDelimBefore ) ||       // 3
     625           0 :                                 ( bDelimBefore && bDelimBehind )))  // 4
     626             :                         break;
     627             :                 }
     628             : 
     629         258 :                 aRet.subRegExpressions = 1;
     630         258 :                 aRet.startOffset.realloc( 1 );
     631         258 :                 aRet.startOffset[ 0 ] = nCmpIdx;
     632         258 :                 aRet.endOffset.realloc( 1 );
     633         258 :                 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
     634             : 
     635         258 :                 return aRet;
     636             :             }
     637             :             else
     638        2232 :                 nSuchIdx--;
     639             :         }
     640             :     }
     641         180 :     return aRet;
     642             : }
     643             : 
     644           0 : SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
     645             :         throw(RuntimeException)
     646             : {
     647           0 :     SearchResult aRet;
     648           0 :     aRet.subRegExpressions = 0;
     649             : 
     650           0 :     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
     651             : 
     652           0 :     OUString aStr( searchStr );
     653           0 :     sal_Int32 nSuchIdx = aStr.getLength();
     654           0 :     sal_Int32 nEnde = endPos;
     655           0 :     if( nSuchIdx == 0 || sSearchKey.isEmpty() || sSearchKey.getLength() > nSuchIdx)
     656           0 :         return aRet;
     657             : 
     658           0 :     if (bUsePrimarySrchStr)
     659           0 :       MakeBackwardTab();                      // create the jumptable
     660             :     else
     661           0 :       MakeBackwardTab2();
     662             : 
     663           0 :     if( nEnde == nSuchIdx )                 // end position for the search
     664           0 :         nEnde = sSearchKey.getLength();
     665             :     else
     666           0 :         nEnde += sSearchKey.getLength();
     667             : 
     668           0 :     sal_Int32 nCmpIdx = startPos;          // start position for the search
     669             : 
     670           0 :     while (nCmpIdx >= nEnde)
     671             :     {
     672             :         // if the match would be the completed cells, skip it.
     673           0 :         if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
     674           0 :                         sSearchKey.getLength() )) && (!checkCTLEnd ||
     675           0 :                     isCellStart( aStr, nCmpIdx)))
     676             :         {
     677           0 :             nSuchIdx = 0;
     678           0 :             while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
     679           0 :                     aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
     680           0 :                 nSuchIdx++;
     681           0 :             if( nSuchIdx >= sSearchKey.getLength() )
     682             :             {
     683           0 :                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
     684             :                 {
     685           0 :                     sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
     686           0 :                     bool bAtStart = !nFndStt;
     687           0 :                     bool bAtEnd = nCmpIdx == startPos;
     688           0 :                     bool bDelimBehind = bAtEnd || IsDelimiter( aStr, nCmpIdx );
     689           0 :                     bool bDelimBefore = bAtStart || // begin of paragraph
     690           0 :                         IsDelimiter( aStr, nFndStt-1 );
     691             :                     //  *       1 -> only one word in the paragraph
     692             :                     //  *       2 -> at begin of paragraph
     693             :                     //  *       3 -> at end of paragraph
     694             :                     //  *       4 -> inside the paragraph
     695           0 :                     if( ( bAtStart && bAtEnd ) ||           // 1
     696           0 :                             ( bAtStart && bDelimBehind ) ||     // 2
     697           0 :                             ( bAtEnd && bDelimBefore ) ||       // 3
     698           0 :                             ( bDelimBefore && bDelimBehind ))   // 4
     699             :                     {
     700           0 :                         aRet.subRegExpressions = 1;
     701           0 :                         aRet.startOffset.realloc( 1 );
     702           0 :                         aRet.startOffset[ 0 ] = nCmpIdx;
     703           0 :                         aRet.endOffset.realloc( 1 );
     704           0 :                         aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
     705           0 :                         return aRet;
     706             :                     }
     707             :                 }
     708             :                 else
     709             :                 {
     710           0 :                     aRet.subRegExpressions = 1;
     711           0 :                     aRet.startOffset.realloc( 1 );
     712           0 :                     aRet.startOffset[ 0 ] = nCmpIdx;
     713           0 :                     aRet.endOffset.realloc( 1 );
     714           0 :                     aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
     715           0 :                     return aRet;
     716             :                 }
     717             :             }
     718             :         }
     719           0 :         nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
     720           0 :         if( nCmpIdx < nSuchIdx )
     721           0 :             return aRet;
     722           0 :         nCmpIdx -= nSuchIdx;
     723             :     }
     724           0 :     return aRet;
     725             : }
     726             : 
     727          34 : void TextSearch::RESrchPrepare( const ::com::sun::star::util::SearchOptions& rOptions)
     728             : {
     729             :     // select the transliterated pattern string
     730             :     const OUString& rPatternStr =
     731          34 :         (rOptions.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr
     732          34 :         : ((rOptions.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : rOptions.searchString);
     733             : 
     734          34 :     sal_uInt32 nIcuSearchFlags = UREGEX_UWORD; // request UAX#29 unicode capability
     735             :     // map com::sun::star::util::SearchFlags to ICU uregex.h flags
     736             :     // TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
     737             :     // REG_NEWLINE is neither properly defined nor used anywhere => not implemented
     738             :     // REG_NOSUB is not used anywhere => not implemented
     739             :     // NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
     740             :     // LEV_RELAXED is only used for SearchAlgorithm==Approximate
     741             :     // Note that the search flag ALL_IGNORE_CASE is deprecated in UNO
     742             :     // probably because the transliteration flag IGNORE_CASE handles it as well.
     743          34 :     if( (rOptions.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0
     744          22 :     ||  (rOptions.transliterateFlags & TransliterationModules_IGNORE_CASE) != 0)
     745          16 :         nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
     746          34 :     UErrorCode nIcuErr = U_ZERO_ERROR;
     747             :     // assumption: transliteration didn't mangle regexp control chars
     748          34 :     IcuUniString aIcuSearchPatStr( (const UChar*)rPatternStr.getStr(), rPatternStr.getLength());
     749             : #ifndef DISABLE_WORDBOUND_EMULATION
     750             :     // for conveniance specific syntax elements of the old regex engine are emulated
     751             :     // - by replacing \< with "word-break followed by a look-ahead word-char"
     752          34 :     static const IcuUniString aChevronPatternB( "\\\\<", -1, IcuUniString::kInvariant);
     753          34 :     static const IcuUniString aChevronReplaceB( "\\\\b(?=\\\\w)", -1, IcuUniString::kInvariant);
     754          34 :     static RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr);
     755          34 :     aChevronMatcherB.reset( aIcuSearchPatStr);
     756          34 :     aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, nIcuErr);
     757          34 :     aChevronMatcherB.reset();
     758             :     // - by replacing \> with "look-behind word-char followed by a word-break"
     759          34 :     static const IcuUniString aChevronPatternE( "\\\\>", -1, IcuUniString::kInvariant);
     760          34 :     static const IcuUniString aChevronReplaceE( "(?<=\\\\w)\\\\b", -1, IcuUniString::kInvariant);
     761          34 :     static RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr);
     762          34 :     aChevronMatcherE.reset( aIcuSearchPatStr);
     763          34 :     aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, nIcuErr);
     764          34 :     aChevronMatcherE.reset();
     765             : #endif
     766          34 :     pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
     767          34 :     if (nIcuErr)
     768             :     {
     769             :         SAL_INFO( "i18npool", "TextSearch::RESrchPrepare UErrorCode " << nIcuErr);
     770           0 :         delete pRegexMatcher;
     771           0 :         pRegexMatcher = NULL;
     772             :     }
     773             :     else
     774             :     {
     775             :         // Pathological patterns may result in exponential run time making the
     776             :         // application appear to be frozen. Limit that. Documentation for this
     777             :         // call says
     778             :         // https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00
     779             :         // "The units of the limit are steps of the match engine.
     780             :         // Correspondence with actual processor time will depend on the speed
     781             :         // of the processor and the details of the specific pattern, but will
     782             :         // typically be on the order of milliseconds."
     783             :         // Just what is a good value? 42 is always an answer ... the 23 enigma
     784             :         // as well.. which on the dev's machine is roughly 50 seconds with the
     785             :         // pattern of fdo#70627.
     786             :         /* TODO: make this a configuration settable value and possibly take
     787             :          * complexity of expression into account and maybe even length of text
     788             :          * to be matched; currently (2013-11-25) that is at most one 64k
     789             :          * paragraph per RESrchFrwrd()/RESrchBkwrd() call. */
     790          34 :         pRegexMatcher->setTimeLimit( 23*1000, nIcuErr);
     791          34 :     }
     792          34 : }
     793             : 
     794             : 
     795             : 
     796        1584 : static bool lcl_findRegex( RegexMatcher * pRegexMatcher, sal_Int32 nStartPos, UErrorCode & rIcuErr )
     797             : {
     798        1584 :     if (!pRegexMatcher->find( nStartPos, rIcuErr))
     799             :     {
     800             :         /* TODO: future versions could pass the UErrorCode or translations
     801             :          * thereof to the caller, for example to inform the user of
     802             :          * U_REGEX_TIME_OUT. The strange thing though is that an error is set
     803             :          * only after the second call that returns immediately and not if
     804             :          * timeout occurred on the first call?!? */
     805             :         SAL_INFO( "i18npool", "lcl_findRegex UErrorCode " << rIcuErr);
     806         276 :         return false;
     807             :     }
     808        1308 :     return true;
     809             : }
     810             : 
     811        1578 : SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
     812             :                                       sal_Int32 startPos, sal_Int32 endPos )
     813             :             throw(RuntimeException)
     814             : {
     815        1578 :     SearchResult aRet;
     816        1578 :     aRet.subRegExpressions = 0;
     817        1578 :     if( !pRegexMatcher)
     818           0 :         return aRet;
     819             : 
     820        1578 :     if( endPos > searchStr.getLength())
     821           0 :         endPos = searchStr.getLength();
     822             : 
     823             :     // use the ICU RegexMatcher to find the matches
     824        1578 :     UErrorCode nIcuErr = U_ZERO_ERROR;
     825        3156 :     const IcuUniString aSearchTargetStr( (const UChar*)searchStr.getStr(), endPos);
     826        1578 :     pRegexMatcher->reset( aSearchTargetStr);
     827             :     // search until there is a valid match
     828             :     for(;;)
     829             :     {
     830        1578 :         if (!lcl_findRegex( pRegexMatcher, startPos, nIcuErr))
     831         274 :             return aRet;
     832             : 
     833             :         // #i118887# ignore zero-length matches e.g. "a*" in "bc"
     834        1304 :         int nStartOfs = pRegexMatcher->start( nIcuErr);
     835        1304 :         int nEndOfs = pRegexMatcher->end( nIcuErr);
     836        1304 :         if( nStartOfs < nEndOfs)
     837        1304 :             break;
     838             :         // If the zero-length match is behind the string, do not match it again
     839             :         // and again until startPos reaches there. A match behind the string is
     840             :         // a "$" anchor.
     841           0 :         if (nStartOfs == endPos)
     842           0 :             break;
     843             :         // try at next position if there was a zero-length match
     844           0 :         if( ++startPos >= endPos)
     845           0 :             return aRet;
     846           0 :     }
     847             : 
     848             :     // extract the result of the search
     849        1304 :     const int nGroupCount = pRegexMatcher->groupCount();
     850        1304 :     aRet.subRegExpressions = nGroupCount + 1;
     851        1304 :     aRet.startOffset.realloc( aRet.subRegExpressions);
     852        1304 :     aRet.endOffset.realloc( aRet.subRegExpressions);
     853        1304 :     aRet.startOffset[0] = pRegexMatcher->start( nIcuErr);
     854        1304 :     aRet.endOffset[0]   = pRegexMatcher->end( nIcuErr);
     855        1308 :     for( int i = 1; i <= nGroupCount; ++i) {
     856           4 :         aRet.startOffset[i] = pRegexMatcher->start( i, nIcuErr);
     857           4 :         aRet.endOffset[i]   = pRegexMatcher->end( i, nIcuErr);
     858             :     }
     859             : 
     860        1304 :     return aRet;
     861             : }
     862             : 
     863           2 : SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
     864             :                                       sal_Int32 startPos, sal_Int32 endPos )
     865             :             throw(RuntimeException)
     866             : {
     867             :     // NOTE: for backwards search callers provide startPos/endPos inverted!
     868           2 :     SearchResult aRet;
     869           2 :     aRet.subRegExpressions = 0;
     870           2 :     if( !pRegexMatcher)
     871           0 :         return aRet;
     872             : 
     873           2 :     if( startPos > searchStr.getLength())
     874           0 :         startPos = searchStr.getLength();
     875             : 
     876             :     // use the ICU RegexMatcher to find the matches
     877             :     // TODO: use ICU's backward searching once it becomes available
     878             :     //       as its replacement using forward search is not as good as the real thing
     879           2 :     UErrorCode nIcuErr = U_ZERO_ERROR;
     880           4 :     const IcuUniString aSearchTargetStr( (const UChar*)searchStr.getStr(), startPos);
     881           2 :     pRegexMatcher->reset( aSearchTargetStr);
     882           2 :     if (!lcl_findRegex( pRegexMatcher, endPos, nIcuErr))
     883           0 :         return aRet;
     884             : 
     885             :     // find the last match
     886           2 :     int nLastPos = 0;
     887           2 :     int nFoundEnd = 0;
     888           2 :     int nGoodPos = 0, nGoodEnd = 0;
     889           2 :     bool bFirst = true;
     890           2 :     do {
     891           2 :         nLastPos = pRegexMatcher->start( nIcuErr);
     892           2 :         nFoundEnd = pRegexMatcher->end( nIcuErr);
     893           2 :         if (nLastPos < nFoundEnd)
     894             :         {
     895             :             // remember last non-zero-length match
     896           2 :             nGoodPos = nLastPos;
     897           2 :             nGoodEnd = nFoundEnd;
     898             :         }
     899           2 :         if( nFoundEnd >= startPos)
     900           0 :             break;
     901           2 :         bFirst = false;
     902           2 :         if( nFoundEnd == nLastPos)
     903           0 :             ++nFoundEnd;
     904           2 :     } while( lcl_findRegex( pRegexMatcher, nFoundEnd, nIcuErr));
     905             : 
     906             :     // Ignore all zero-length matches except "$" anchor on first match.
     907           2 :     if (nGoodPos == nGoodEnd)
     908             :     {
     909           0 :         if (bFirst && nLastPos == startPos)
     910           0 :             nGoodPos = nLastPos;
     911             :         else
     912           0 :             return aRet;
     913             :     }
     914             : 
     915             :     // find last match again to get its details
     916           2 :     lcl_findRegex( pRegexMatcher, nGoodPos, nIcuErr);
     917             : 
     918             :     // fill in the details of the last match
     919           2 :     const int nGroupCount = pRegexMatcher->groupCount();
     920           2 :     aRet.subRegExpressions = nGroupCount + 1;
     921           2 :     aRet.startOffset.realloc( aRet.subRegExpressions);
     922           2 :     aRet.endOffset.realloc( aRet.subRegExpressions);
     923             :     // NOTE: existing users of backward search seem to expect startOfs/endOfs being inverted!
     924           2 :     aRet.startOffset[0] = pRegexMatcher->end( nIcuErr);
     925           2 :     aRet.endOffset[0]   = pRegexMatcher->start( nIcuErr);
     926           6 :     for( int i = 1; i <= nGroupCount; ++i) {
     927           4 :         aRet.startOffset[i] = pRegexMatcher->end( i, nIcuErr);
     928           4 :         aRet.endOffset[i]   = pRegexMatcher->start( i, nIcuErr);
     929             :     }
     930             : 
     931           2 :     return aRet;
     932             : }
     933             : 
     934             : 
     935             : 
     936             : // search for words phonetically
     937           0 : SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
     938             :                                           sal_Int32 startPos, sal_Int32 endPos )
     939             :             throw(RuntimeException)
     940             : {
     941           0 :     SearchResult aRet;
     942           0 :     aRet.subRegExpressions = 0;
     943             : 
     944           0 :     if( !xBreak.is() )
     945           0 :         return aRet;
     946             : 
     947           0 :     OUString aWTemp( searchStr );
     948             : 
     949             :     sal_Int32 nStt, nEnd;
     950             : 
     951           0 :     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
     952             :             aSrchPara.Locale,
     953           0 :             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
     954             : 
     955           0 :     do
     956             :     {
     957           0 :         if( aWBnd.startPos >= endPos )
     958           0 :             break;
     959           0 :         nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
     960           0 :         nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
     961             : 
     962           0 :         if( nStt < nEnd &&
     963           0 :                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
     964             :         {
     965           0 :             aRet.subRegExpressions = 1;
     966           0 :             aRet.startOffset.realloc( 1 );
     967           0 :             aRet.startOffset[ 0 ] = nStt;
     968           0 :             aRet.endOffset.realloc( 1 );
     969           0 :             aRet.endOffset[ 0 ] = nEnd;
     970           0 :             break;
     971             :         }
     972             : 
     973           0 :         nStt = nEnd - 1;
     974           0 :         aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
     975           0 :                 WordType::ANYWORD_IGNOREWHITESPACES);
     976           0 :     } while( aWBnd.startPos != aWBnd.endPos ||
     977           0 :             (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
     978             :     // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
     979             :     // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
     980             :     // and nextWord() does also => don't loop forever.
     981           0 :     return aRet;
     982             : }
     983             : 
     984           0 : SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
     985             :                                           sal_Int32 startPos, sal_Int32 endPos )
     986             :             throw(RuntimeException)
     987             : {
     988           0 :     SearchResult aRet;
     989           0 :     aRet.subRegExpressions = 0;
     990             : 
     991           0 :     if( !xBreak.is() )
     992           0 :         return aRet;
     993             : 
     994           0 :     OUString aWTemp( searchStr );
     995             : 
     996             :     sal_Int32 nStt, nEnd;
     997             : 
     998           0 :     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
     999             :             aSrchPara.Locale,
    1000           0 :             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
    1001             : 
    1002           0 :     do
    1003             :     {
    1004           0 :         if( aWBnd.endPos <= endPos )
    1005           0 :             break;
    1006           0 :         nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
    1007           0 :         nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
    1008             : 
    1009           0 :         if( nStt < nEnd &&
    1010           0 :                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
    1011             :         {
    1012           0 :             aRet.subRegExpressions = 1;
    1013           0 :             aRet.startOffset.realloc( 1 );
    1014           0 :             aRet.startOffset[ 0 ] = nEnd;
    1015           0 :             aRet.endOffset.realloc( 1 );
    1016           0 :             aRet.endOffset[ 0 ] = nStt;
    1017           0 :             break;
    1018             :         }
    1019           0 :         if( !nStt )
    1020           0 :             break;
    1021             : 
    1022           0 :         aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
    1023           0 :                 WordType::ANYWORD_IGNOREWHITESPACES);
    1024           0 :     } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
    1025           0 :     return aRet;
    1026             : }
    1027             : 
    1028             : 
    1029             : static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch";
    1030             : static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
    1031             : 
    1032          20 : static OUString getServiceName_Static()
    1033             : {
    1034          20 :     return OUString::createFromAscii( cSearchName );
    1035             : }
    1036             : 
    1037          20 : static OUString getImplementationName_Static()
    1038             : {
    1039          20 :     return OUString::createFromAscii( cSearchImpl );
    1040             : }
    1041             : 
    1042             : OUString SAL_CALL
    1043           0 : TextSearch::getImplementationName()
    1044             :                 throw( RuntimeException, std::exception )
    1045             : {
    1046           0 :     return getImplementationName_Static();
    1047             : }
    1048             : 
    1049           0 : sal_Bool SAL_CALL TextSearch::supportsService(const OUString& rServiceName)
    1050             :                 throw( RuntimeException, std::exception )
    1051             : {
    1052           0 :     return cppu::supportsService(this, rServiceName);
    1053             : }
    1054             : 
    1055             : Sequence< OUString > SAL_CALL
    1056           0 : TextSearch::getSupportedServiceNames(void) throw( RuntimeException, std::exception )
    1057             : {
    1058           0 :     Sequence< OUString > aRet(1);
    1059           0 :     aRet[0] = getServiceName_Static();
    1060           0 :     return aRet;
    1061             : }
    1062             : 
    1063             : ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
    1064         112 : SAL_CALL TextSearch_CreateInstance(
    1065             :         const ::com::sun::star::uno::Reference<
    1066             :         ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
    1067             : {
    1068             :     return ::com::sun::star::uno::Reference<
    1069             :         ::com::sun::star::uno::XInterface >(
    1070             :                 (::cppu::OWeakObject*) new TextSearch(
    1071         112 :                         comphelper::getComponentContext( rxMSF ) ) );
    1072             : }
    1073             : 
    1074             : extern "C"
    1075             : {
    1076             : SAL_DLLPUBLIC_EXPORT void* SAL_CALL
    1077          20 : i18nsearch_component_getFactory( const sal_Char* sImplementationName,
    1078             :                                  void* _pServiceManager,
    1079             :                                  SAL_UNUSED_PARAMETER void* )
    1080             : {
    1081          20 :     void* pRet = NULL;
    1082             : 
    1083             :     ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
    1084             :         reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* >
    1085          20 :             ( _pServiceManager );
    1086             :     ::com::sun::star::uno::Reference<
    1087          20 :             ::com::sun::star::lang::XSingleServiceFactory > xFactory;
    1088             : 
    1089          20 :     if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
    1090             :     {
    1091          20 :         ::com::sun::star::uno::Sequence< OUString > aServiceNames(1);
    1092          20 :         aServiceNames[0] = getServiceName_Static();
    1093          40 :         xFactory = ::cppu::createSingleFactory(
    1094             :                 pServiceManager, getImplementationName_Static(),
    1095          40 :                 &TextSearch_CreateInstance, aServiceNames );
    1096             :     }
    1097             : 
    1098          20 :     if ( xFactory.is() )
    1099             :     {
    1100          20 :         xFactory->acquire();
    1101          20 :         pRet = xFactory.get();
    1102             :     }
    1103             : 
    1104          20 :     return pRet;
    1105             : }
    1106             : 
    1107             : } // extern "C"
    1108             : 
    1109             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10