LCOV - code coverage report
Current view: top level - i18npool/source/search - textsearch.cxx (source / functions) Hit Total Coverage
Test: commit e02a6cb2c3e2b23b203b422e4e0680877f232636 Lines: 0 551 0.0 %
Date: 2014-04-14 Functions: 0 29 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "textsearch.hxx"
      21             : #include "levdis.hxx"
      22             : #include <com/sun/star/lang/Locale.hpp>
      23             : #include <com/sun/star/lang/XMultiServiceFactory.hpp>
      24             : #include <comphelper/processfactory.hxx>
      25             : #include <com/sun/star/i18n/BreakIterator.hpp>
      26             : #include <com/sun/star/i18n/UnicodeType.hpp>
      27             : #include <com/sun/star/util/SearchFlags.hpp>
      28             : #include <com/sun/star/i18n/WordType.hpp>
      29             : #include <com/sun/star/i18n/ScriptType.hpp>
      30             : #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
      31             : #include <com/sun/star/i18n/CharacterClassification.hpp>
      32             : #include <com/sun/star/i18n/KCharacterType.hpp>
      33             : #include <com/sun/star/i18n/Transliteration.hpp>
      34             : #include <com/sun/star/registry/XRegistryKey.hpp>
      35             : #include <cppuhelper/factory.hxx>
      36             : #include <cppuhelper/supportsservice.hxx>
      37             : #include <cppuhelper/weak.hxx>
      38             : 
      39             : #ifdef _MSC_VER
      40             : // get rid of that dumb compiler warning
      41             : // identifier was truncated to '255' characters in the debug information
      42             : // for STL template usage, if .pdb files are to be created
      43             : #pragma warning( disable: 4786 )
      44             : #endif
      45             : 
      46             : #include <string.h>
      47             : 
      48             : using namespace ::com::sun::star::util;
      49             : using namespace ::com::sun::star::uno;
      50             : using namespace ::com::sun::star::lang;
      51             : using namespace ::com::sun::star::i18n;
      52             : using namespace ::com::sun::star;
      53             : 
      54             : static const sal_Int32 COMPLEX_TRANS_MASK_TMP =
      55             :     TransliterationModules_ignoreBaFa_ja_JP |
      56             :     TransliterationModules_ignoreIterationMark_ja_JP |
      57             :     TransliterationModules_ignoreTiJi_ja_JP |
      58             :     TransliterationModules_ignoreHyuByu_ja_JP |
      59             :     TransliterationModules_ignoreSeZe_ja_JP |
      60             :     TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
      61             :     TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
      62             :     TransliterationModules_ignoreProlongedSoundMark_ja_JP;
      63             : 
      64             : // These 2 transliterations are simple but need to take effect in
      65             : // complex transliteration.
      66             : static const sal_Int32 COMPLEX_TRANS_MASK =
      67             :     COMPLEX_TRANS_MASK_TMP |
      68             :     TransliterationModules_IGNORE_KANA |
      69             :     TransliterationModules_FULLWIDTH_HALFWIDTH;
      70             : 
      71             : static const sal_Int32 SIMPLE_TRANS_MASK = ~COMPLEX_TRANS_MASK;
      72             : 
      73             : // Regex patterns are case sensitive.
      74             : static const sal_Int32 SIMPLE_TRANS_MASK_REPATTERN =
      75             :     ~(COMPLEX_TRANS_MASK |
      76             :             TransliterationModules_IGNORE_CASE |
      77             :             TransliterationModules_UPPERCASE_LOWERCASE |
      78             :             TransliterationModules_LOWERCASE_UPPERCASE);
      79             : 
      80             : 
      81           0 : TextSearch::TextSearch(const Reference < XComponentContext > & rxContext)
      82             :         : m_xContext( rxContext )
      83             :         , pJumpTable( 0 )
      84             :         , pJumpTable2( 0 )
      85             :         , pRegexMatcher( NULL )
      86           0 :         , pWLD( 0 )
      87             : {
      88           0 :     SearchOptions aOpt;
      89           0 :     aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
      90           0 :     aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
      91             :     //aOpt.Locale = ???;
      92           0 :     setOptions( aOpt );
      93           0 : }
      94             : 
      95           0 : TextSearch::~TextSearch()
      96             : {
      97           0 :     delete pRegexMatcher;
      98           0 :     delete pWLD;
      99           0 :     delete pJumpTable;
     100           0 :     delete pJumpTable2;
     101           0 : }
     102             : 
     103           0 : void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException, std::exception )
     104             : {
     105           0 :     aSrchPara = rOptions;
     106             : 
     107           0 :     delete pRegexMatcher, pRegexMatcher = NULL;
     108           0 :     delete pWLD, pWLD = 0;
     109           0 :     delete pJumpTable, pJumpTable = 0;
     110           0 :     delete pJumpTable2, pJumpTable2 = 0;
     111             : 
     112             :     // Create Transliteration class
     113           0 :     if( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
     114             :     {
     115           0 :         if( !xTranslit.is() )
     116           0 :             xTranslit.set( Transliteration::create( m_xContext ) );
     117           0 :         xTranslit->loadModule(
     118           0 :              (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK ),
     119           0 :              aSrchPara.Locale);
     120             :     }
     121           0 :     else if( xTranslit.is() )
     122           0 :         xTranslit = 0;
     123             : 
     124             :     // Create Transliteration for 2<->1, 2<->2 transliteration
     125           0 :     if ( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
     126             :     {
     127           0 :         if( !xTranslit2.is() )
     128           0 :             xTranslit2.set( Transliteration::create( m_xContext ) );
     129             :         // Load transliteration module
     130           0 :         xTranslit2->loadModule(
     131           0 :              (TransliterationModules)( aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK ),
     132           0 :              aSrchPara.Locale);
     133             :     }
     134             : 
     135           0 :     if ( !xBreak.is() )
     136           0 :         xBreak = com::sun::star::i18n::BreakIterator::create( m_xContext );
     137             : 
     138           0 :     sSrchStr = aSrchPara.searchString;
     139             : 
     140             :     // Transliterate search string.
     141           0 :     if (aSrchPara.algorithmType == SearchAlgorithms_REGEXP)
     142             :     {
     143           0 :         if (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK_REPATTERN)
     144             :         {
     145           0 :             if ((aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK_REPATTERN) !=
     146             :                     (aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK))
     147             :             {
     148             :                 com::sun::star::uno::Reference< XExtendedTransliteration > xTranslitPattern(
     149           0 :                          Transliteration::create( m_xContext ));
     150           0 :                 if (xTranslitPattern.is())
     151             :                 {
     152           0 :                     xTranslitPattern->loadModule(
     153           0 :                             (TransliterationModules)( aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK_REPATTERN ),
     154           0 :                             aSrchPara.Locale);
     155           0 :                     sSrchStr = xTranslitPattern->transliterateString2String(
     156           0 :                             aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
     157           0 :                 }
     158             :             }
     159             :             else
     160             :             {
     161           0 :                 if (xTranslit.is())
     162           0 :                     sSrchStr = xTranslit->transliterateString2String(
     163           0 :                             aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
     164             :             }
     165             :             // xTranslit2 complex transliterated sSrchStr2 is not used in
     166             :             // regex, see TextSearch::searchForward() and
     167             :             // TextSearch::searchBackward()
     168             :         }
     169             :     }
     170             :     else
     171             :     {
     172           0 :         if ( xTranslit.is() && aSrchPara.transliterateFlags & SIMPLE_TRANS_MASK )
     173           0 :             sSrchStr = xTranslit->transliterateString2String(
     174           0 :                     aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
     175             : 
     176           0 :         if ( xTranslit2.is() && aSrchPara.transliterateFlags & COMPLEX_TRANS_MASK )
     177           0 :             sSrchStr2 = xTranslit2->transliterateString2String(
     178           0 :                     aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
     179             :     }
     180             : 
     181             :     // When start or end of search string is a complex script type, we need to
     182             :     // make sure the result boundary is not located in the middle of cell.
     183           0 :     checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
     184           0 :                 ScriptType::COMPLEX));
     185           0 :     checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
     186           0 :                     sSrchStr.getLength()-1) == ScriptType::COMPLEX));
     187             : 
     188           0 :     switch( aSrchPara.algorithmType)
     189             :     {
     190             :         case SearchAlgorithms_REGEXP:
     191           0 :             fnForward = &TextSearch::RESrchFrwrd;
     192           0 :             fnBackward = &TextSearch::RESrchBkwrd;
     193           0 :             RESrchPrepare( aSrchPara);
     194           0 :             break;
     195             : 
     196             :         case SearchAlgorithms_APPROXIMATE:
     197           0 :             fnForward = &TextSearch::ApproxSrchFrwrd;
     198           0 :             fnBackward = &TextSearch::ApproxSrchBkwrd;
     199             : 
     200           0 :             pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
     201             :                     aSrchPara.insertedChars, aSrchPara.deletedChars,
     202           0 :                     0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
     203             : 
     204           0 :             nLimit = pWLD->GetLimit();
     205           0 :             break;
     206             : 
     207             :         default:
     208           0 :             fnForward = &TextSearch::NSrchFrwrd;
     209           0 :             fnBackward = &TextSearch::NSrchBkwrd;
     210           0 :             break;
     211             :     }
     212           0 : }
     213             : 
     214           0 : sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
     215             : {
     216           0 :     sal_Int32 nRet = 0, nEnd = rOff.getLength();
     217           0 :     while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
     218           0 :     return nRet;
     219             : }
     220             : 
     221           0 : sal_Bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
     222             :         throw( RuntimeException )
     223             : {
     224             :     sal_Int32 nDone;
     225           0 :     return nPos == xBreak->previousCharacters(searchStr, nPos+1,
     226           0 :             aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
     227             : }
     228             : 
     229           0 : SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
     230             :         throw( RuntimeException, std::exception )
     231             : {
     232           0 :     SearchResult sres;
     233             : 
     234           0 :     OUString in_str(searchStr);
     235           0 :     sal_Int32 newStartPos = startPos;
     236           0 :     sal_Int32 newEndPos = endPos;
     237             : 
     238           0 :     bUsePrimarySrchStr = true;
     239             : 
     240           0 :     if ( xTranslit.is() )
     241             :     {
     242             :         // apply normal transliteration (1<->1, 1<->0)
     243           0 :         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
     244           0 :         in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
     245             : 
     246             :         // JP 20.6.2001: also the start and end positions must be corrected!
     247           0 :         if( startPos )
     248           0 :             newStartPos = FindPosInSeq_Impl( offset, startPos );
     249             : 
     250           0 :         if( endPos < searchStr.getLength() )
     251           0 :             newEndPos = FindPosInSeq_Impl( offset, endPos );
     252             :         else
     253           0 :             newEndPos = in_str.getLength();
     254             : 
     255           0 :         sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
     256             : 
     257             :         // Map offsets back to untransliterated string.
     258           0 :         const sal_Int32 nOffsets = offset.getLength();
     259           0 :         if (nOffsets)
     260             :         {
     261             :             // For regex nGroups is the number of groups+1 with group 0 being
     262             :             // the entire match.
     263           0 :             const sal_Int32 nGroups = sres.startOffset.getLength();
     264           0 :             for ( sal_Int32 k = 0; k < nGroups; k++ )
     265             :             {
     266           0 :                 const sal_Int32 nStart = sres.startOffset[k];
     267           0 :                 if (nStart > 0)
     268           0 :                     sres.startOffset[k] = (nStart < nOffsets ? offset[nStart] : (offset[nOffsets - 1] + 1));
     269             :                 // JP 20.6.2001: end is ever exclusive and then don't return
     270             :                 //               the position of the next character - return the
     271             :                 //               next position behind the last found character!
     272             :                 //               "a b c" find "b" must return 2,3 and not 2,4!!!
     273           0 :                 const sal_Int32 nStop = sres.endOffset[k];
     274           0 :                 if (nStop > 0)
     275           0 :                     sres.endOffset[k] = offset[(nStop <= nOffsets ? nStop : nOffsets) - 1] + 1;
     276             :             }
     277           0 :         }
     278             :     }
     279             :     else
     280             :     {
     281           0 :         sres = (this->*fnForward)( in_str, startPos, endPos );
     282             :     }
     283             : 
     284           0 :     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
     285             :     {
     286           0 :         SearchResult sres2;
     287             : 
     288           0 :         in_str = OUString(searchStr);
     289           0 :         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
     290             : 
     291           0 :         in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
     292             : 
     293           0 :         if( startPos )
     294           0 :             startPos = FindPosInSeq_Impl( offset, startPos );
     295             : 
     296           0 :         if( endPos < searchStr.getLength() )
     297           0 :             endPos = FindPosInSeq_Impl( offset, endPos );
     298             :         else
     299           0 :             endPos = in_str.getLength();
     300             : 
     301           0 :     bUsePrimarySrchStr = false;
     302           0 :         sres2 = (this->*fnForward)( in_str, startPos, endPos );
     303             : 
     304           0 :         for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
     305             :         {
     306           0 :             if (sres2.startOffset[k])
     307           0 :           sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
     308           0 :             if (sres2.endOffset[k])
     309           0 :           sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
     310             :         }
     311             : 
     312             :     // pick first and long one
     313           0 :     if ( sres.subRegExpressions == 0)
     314           0 :         return sres2;
     315           0 :     if ( sres2.subRegExpressions == 1)
     316             :     {
     317           0 :         if ( sres.startOffset[0] > sres2.startOffset[0])
     318           0 :             return sres2;
     319           0 :         else if ( sres.startOffset[0] == sres2.startOffset[0] &&
     320           0 :             sres.endOffset[0] < sres2.endOffset[0])
     321           0 :             return sres2;
     322           0 :     }
     323             :     }
     324             : 
     325           0 :     return sres;
     326             : }
     327             : 
     328           0 : SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
     329             :         throw(RuntimeException, std::exception)
     330             : {
     331           0 :     SearchResult sres;
     332             : 
     333           0 :     OUString in_str(searchStr);
     334           0 :     sal_Int32 newStartPos = startPos;
     335           0 :     sal_Int32 newEndPos = endPos;
     336             : 
     337           0 :     bUsePrimarySrchStr = true;
     338             : 
     339           0 :     if ( xTranslit.is() )
     340             :     {
     341             :         // apply only simple 1<->1 transliteration here
     342           0 :         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
     343           0 :     in_str = xTranslit->transliterate( searchStr, 0, in_str.getLength(), offset );
     344             : 
     345             :         // JP 20.6.2001: also the start and end positions must be corrected!
     346           0 :         if( startPos < searchStr.getLength() )
     347           0 :             newStartPos = FindPosInSeq_Impl( offset, startPos );
     348             :         else
     349           0 :             newStartPos = in_str.getLength();
     350             : 
     351           0 :         if( endPos )
     352           0 :             newEndPos = FindPosInSeq_Impl( offset, endPos );
     353             : 
     354           0 :         sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
     355             : 
     356             :         // Map offsets back to untransliterated string.
     357           0 :         const sal_Int32 nOffsets = offset.getLength();
     358           0 :         if (nOffsets)
     359             :         {
     360             :             // For regex nGroups is the number of groups+1 with group 0 being
     361             :             // the entire match.
     362           0 :             const sal_Int32 nGroups = sres.startOffset.getLength();
     363           0 :             for ( sal_Int32 k = 0; k < nGroups; k++ )
     364             :             {
     365           0 :                 const sal_Int32 nStart = sres.startOffset[k];
     366           0 :                 if (nStart > 0)
     367           0 :                     sres.startOffset[k] = offset[(nStart <= nOffsets ? nStart : nOffsets) - 1] + 1;
     368             :                 // JP 20.6.2001: end is ever exclusive and then don't return
     369             :                 //               the position of the next character - return the
     370             :                 //               next position behind the last found character!
     371             :                 //               "a b c" find "b" must return 2,3 and not 2,4!!!
     372           0 :                 const sal_Int32 nStop = sres.endOffset[k];
     373           0 :                 if (nStop > 0)
     374           0 :                     sres.endOffset[k] = (nStop < nOffsets ? offset[nStop] : (offset[nOffsets - 1] + 1));
     375             :             }
     376           0 :         }
     377             :     }
     378             :     else
     379             :     {
     380           0 :         sres = (this->*fnBackward)( in_str, startPos, endPos );
     381             :     }
     382             : 
     383           0 :     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
     384             :     {
     385           0 :     SearchResult sres2;
     386             : 
     387           0 :     in_str = OUString(searchStr);
     388           0 :         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
     389             : 
     390           0 :         in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
     391             : 
     392           0 :         if( startPos < searchStr.getLength() )
     393           0 :             startPos = FindPosInSeq_Impl( offset, startPos );
     394             :         else
     395           0 :             startPos = in_str.getLength();
     396             : 
     397           0 :         if( endPos )
     398           0 :             endPos = FindPosInSeq_Impl( offset, endPos );
     399             : 
     400           0 :     bUsePrimarySrchStr = false;
     401           0 :     sres2 = (this->*fnBackward)( in_str, startPos, endPos );
     402             : 
     403           0 :         for( int k = 0; k < sres2.startOffset.getLength(); k++ )
     404             :         {
     405           0 :             if (sres2.startOffset[k])
     406           0 :                 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
     407           0 :             if (sres2.endOffset[k])
     408           0 :                 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
     409             :         }
     410             : 
     411             :     // pick last and long one
     412           0 :     if ( sres.subRegExpressions == 0 )
     413           0 :         return sres2;
     414           0 :     if ( sres2.subRegExpressions == 1 )
     415             :     {
     416           0 :         if ( sres.startOffset[0] < sres2.startOffset[0] )
     417           0 :             return sres2;
     418           0 :         if ( sres.startOffset[0] == sres2.startOffset[0] &&
     419           0 :         sres.endOffset[0] > sres2.endOffset[0] )
     420           0 :             return sres2;
     421           0 :     }
     422             :     }
     423             : 
     424           0 :     return sres;
     425             : }
     426             : 
     427             : 
     428             : 
     429           0 : bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
     430             : {
     431           0 :     bool bRet = true;
     432           0 :     if( '\x7f' != rStr[nPos])
     433             :     {
     434           0 :         if ( !xCharClass.is() )
     435           0 :              xCharClass = CharacterClassification::create( m_xContext );
     436           0 :         sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
     437           0 :                 aSrchPara.Locale );
     438           0 :         if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
     439           0 :                         KCharacterType::LETTER ) & nCType ) )
     440           0 :             bRet = false;
     441             :     }
     442           0 :     return bRet;
     443             : }
     444             : 
     445             : // --------- helper methods for Boyer-Moore like text searching ----------
     446             : // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available
     447             : 
     448           0 : void TextSearch::MakeForwardTab()
     449             : {
     450             :     // create the jumptable for the search text
     451           0 :     if( pJumpTable )
     452             :     {
     453           0 :         if( bIsForwardTab )
     454           0 :             return ;                                        // the jumpTable is ok
     455           0 :         delete pJumpTable;
     456             :     }
     457           0 :     bIsForwardTab = true;
     458             : 
     459           0 :     sal_Int32 n, nLen = sSrchStr.getLength();
     460           0 :     pJumpTable = new TextSearchJumpTable;
     461             : 
     462           0 :     for( n = 0; n < nLen - 1; ++n )
     463             :     {
     464           0 :         sal_Unicode cCh = sSrchStr[n];
     465           0 :         sal_Int32 nDiff = nLen - n - 1;
     466           0 :     TextSearchJumpTable::value_type aEntry( cCh, nDiff );
     467             : 
     468             :         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
     469           0 :             pJumpTable->insert( aEntry );
     470           0 :         if ( !aPair.second )
     471           0 :             (*(aPair.first)).second = nDiff;
     472             :     }
     473             : }
     474             : 
     475           0 : void TextSearch::MakeForwardTab2()
     476             : {
     477             :     // create the jumptable for the search text
     478           0 :     if( pJumpTable2 )
     479             :     {
     480           0 :         if( bIsForwardTab )
     481           0 :             return ;                                        // the jumpTable is ok
     482           0 :         delete pJumpTable2;
     483             :     }
     484           0 :     bIsForwardTab = true;
     485             : 
     486           0 :     sal_Int32 n, nLen = sSrchStr2.getLength();
     487           0 :     pJumpTable2 = new TextSearchJumpTable;
     488             : 
     489           0 :     for( n = 0; n < nLen - 1; ++n )
     490             :     {
     491           0 :         sal_Unicode cCh = sSrchStr2[n];
     492           0 :         sal_Int32 nDiff = nLen - n - 1;
     493             : 
     494           0 :     TextSearchJumpTable::value_type aEntry( cCh, nDiff );
     495             :         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
     496           0 :             pJumpTable2->insert( aEntry );
     497           0 :         if ( !aPair.second )
     498           0 :             (*(aPair.first)).second = nDiff;
     499             :     }
     500             : }
     501             : 
     502           0 : void TextSearch::MakeBackwardTab()
     503             : {
     504             :     // create the jumptable for the search text
     505           0 :     if( pJumpTable )
     506             :     {
     507           0 :         if( !bIsForwardTab )
     508           0 :             return ;                                        // the jumpTable is ok
     509           0 :         delete pJumpTable;
     510             :     }
     511           0 :     bIsForwardTab = false;
     512             : 
     513           0 :     sal_Int32 n, nLen = sSrchStr.getLength();
     514           0 :     pJumpTable = new TextSearchJumpTable;
     515             : 
     516           0 :     for( n = nLen-1; n > 0; --n )
     517             :     {
     518           0 :         sal_Unicode cCh = sSrchStr[n];
     519           0 :         TextSearchJumpTable::value_type aEntry( cCh, n );
     520             :         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
     521           0 :             pJumpTable->insert( aEntry );
     522           0 :         if ( !aPair.second )
     523           0 :             (*(aPair.first)).second = n;
     524             :     }
     525             : }
     526             : 
     527           0 : void TextSearch::MakeBackwardTab2()
     528             : {
     529             :     // create the jumptable for the search text
     530           0 :     if( pJumpTable2 )
     531             :     {
     532           0 :         if( !bIsForwardTab )
     533           0 :             return ;                                        // the jumpTable is ok
     534           0 :         delete pJumpTable2;
     535             :     }
     536           0 :     bIsForwardTab = false;
     537             : 
     538           0 :     sal_Int32 n, nLen = sSrchStr2.getLength();
     539           0 :     pJumpTable2 = new TextSearchJumpTable;
     540             : 
     541           0 :     for( n = nLen-1; n > 0; --n )
     542             :     {
     543           0 :         sal_Unicode cCh = sSrchStr2[n];
     544           0 :         TextSearchJumpTable::value_type aEntry( cCh, n );
     545             :         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
     546           0 :             pJumpTable2->insert( aEntry );
     547           0 :         if ( !aPair.second )
     548           0 :             (*(aPair.first)).second = n;
     549             :     }
     550             : }
     551             : 
     552           0 : sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
     553             : {
     554             :     TextSearchJumpTable *pJump;
     555           0 :     OUString sSearchKey;
     556             : 
     557           0 :     if ( bUsePrimarySrchStr ) {
     558           0 :       pJump = pJumpTable;
     559           0 :       sSearchKey = sSrchStr;
     560             :     } else {
     561           0 :       pJump = pJumpTable2;
     562           0 :       sSearchKey = sSrchStr2;
     563             :     }
     564             : 
     565           0 :     TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
     566           0 :     if ( iLook == pJump->end() )
     567           0 :         return sSearchKey.getLength();
     568           0 :     return (*iLook).second;
     569             : }
     570             : 
     571             : 
     572             : // TextSearch::NSrchFrwrd is mis-optimized on unxsoli (#i105945#)
     573           0 : SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
     574             :         throw(RuntimeException)
     575             : {
     576           0 :     SearchResult aRet;
     577           0 :     aRet.subRegExpressions = 0;
     578             : 
     579           0 :     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
     580             : 
     581           0 :     OUString aStr( searchStr );
     582           0 :     sal_Int32 nSuchIdx = aStr.getLength();
     583           0 :     sal_Int32 nEnde = endPos;
     584           0 :     if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
     585           0 :         return aRet;
     586             : 
     587             : 
     588           0 :     if( nEnde < sSearchKey.getLength() )  // position inside the search region ?
     589           0 :         return aRet;
     590             : 
     591           0 :     nEnde -= sSearchKey.getLength();
     592             : 
     593           0 :     if (bUsePrimarySrchStr)
     594           0 :       MakeForwardTab();                   // create the jumptable
     595             :     else
     596           0 :       MakeForwardTab2();
     597             : 
     598           0 :     for (sal_Int32 nCmpIdx = startPos; // start position for the search
     599             :             nCmpIdx <= nEnde;
     600           0 :             nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
     601             :     {
     602             :         // if the match would be the completed cells, skip it.
     603           0 :         if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
     604           0 :                     && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
     605           0 :             continue;
     606             : 
     607           0 :         nSuchIdx = sSearchKey.getLength() - 1;
     608           0 :         while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
     609             :         {
     610           0 :             if( nSuchIdx == 0 )
     611             :             {
     612           0 :                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
     613             :                 {
     614           0 :                     sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
     615           0 :                     bool bAtStart = !nCmpIdx;
     616           0 :                     bool bAtEnd = nFndEnd == endPos;
     617           0 :                     bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
     618           0 :                     bool bDelimBehind = IsDelimiter(  aStr, nFndEnd );
     619             :                     //  *       1 -> only one word in the paragraph
     620             :                     //  *       2 -> at begin of paragraph
     621             :                     //  *       3 -> at end of paragraph
     622             :                     //  *       4 -> inside the paragraph
     623           0 :                     if( !(  ( bAtStart && bAtEnd ) ||           // 1
     624           0 :                                 ( bAtStart && bDelimBehind ) ||     // 2
     625           0 :                                 ( bAtEnd && bDelimBefore ) ||       // 3
     626           0 :                                 ( bDelimBefore && bDelimBehind )))  // 4
     627             :                         break;
     628             :                 }
     629             : 
     630           0 :                 aRet.subRegExpressions = 1;
     631           0 :                 aRet.startOffset.realloc( 1 );
     632           0 :                 aRet.startOffset[ 0 ] = nCmpIdx;
     633           0 :                 aRet.endOffset.realloc( 1 );
     634           0 :                 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
     635             : 
     636           0 :                 return aRet;
     637             :             }
     638             :             else
     639           0 :                 nSuchIdx--;
     640             :         }
     641             :     }
     642           0 :     return aRet;
     643             : }
     644             : 
     645           0 : SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
     646             :         throw(RuntimeException)
     647             : {
     648           0 :     SearchResult aRet;
     649           0 :     aRet.subRegExpressions = 0;
     650             : 
     651           0 :     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
     652             : 
     653           0 :     OUString aStr( searchStr );
     654           0 :     sal_Int32 nSuchIdx = aStr.getLength();
     655           0 :     sal_Int32 nEnde = endPos;
     656           0 :     if( nSuchIdx == 0 || sSearchKey.isEmpty() || sSearchKey.getLength() > nSuchIdx)
     657           0 :         return aRet;
     658             : 
     659           0 :     if (bUsePrimarySrchStr)
     660           0 :       MakeBackwardTab();                      // create the jumptable
     661             :     else
     662           0 :       MakeBackwardTab2();
     663             : 
     664           0 :     if( nEnde == nSuchIdx )                 // end position for the search
     665           0 :         nEnde = sSearchKey.getLength();
     666             :     else
     667           0 :         nEnde += sSearchKey.getLength();
     668             : 
     669           0 :     sal_Int32 nCmpIdx = startPos;          // start position for the search
     670             : 
     671           0 :     while (nCmpIdx >= nEnde)
     672             :     {
     673             :         // if the match would be the completed cells, skip it.
     674           0 :         if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
     675           0 :                         sSearchKey.getLength() )) && (!checkCTLEnd ||
     676           0 :                     isCellStart( aStr, nCmpIdx)))
     677             :         {
     678           0 :             nSuchIdx = 0;
     679           0 :             while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
     680           0 :                     aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
     681           0 :                 nSuchIdx++;
     682           0 :             if( nSuchIdx >= sSearchKey.getLength() )
     683             :             {
     684           0 :                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
     685             :                 {
     686           0 :                     sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
     687           0 :                     bool bAtStart = !nFndStt;
     688           0 :                     bool bAtEnd = nCmpIdx == startPos;
     689           0 :                     bool bDelimBehind = IsDelimiter( aStr, nCmpIdx );
     690           0 :                     bool bDelimBefore = bAtStart || // begin of paragraph
     691           0 :                         IsDelimiter( aStr, nFndStt-1 );
     692             :                     //  *       1 -> only one word in the paragraph
     693             :                     //  *       2 -> at begin of paragraph
     694             :                     //  *       3 -> at end of paragraph
     695             :                     //  *       4 -> inside the paragraph
     696           0 :                     if( ( bAtStart && bAtEnd ) ||           // 1
     697           0 :                             ( bAtStart && bDelimBehind ) ||     // 2
     698           0 :                             ( bAtEnd && bDelimBefore ) ||       // 3
     699           0 :                             ( bDelimBefore && bDelimBehind ))   // 4
     700             :                     {
     701           0 :                         aRet.subRegExpressions = 1;
     702           0 :                         aRet.startOffset.realloc( 1 );
     703           0 :                         aRet.startOffset[ 0 ] = nCmpIdx;
     704           0 :                         aRet.endOffset.realloc( 1 );
     705           0 :                         aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
     706           0 :                         return aRet;
     707             :                     }
     708             :                 }
     709             :                 else
     710             :                 {
     711           0 :                     aRet.subRegExpressions = 1;
     712           0 :                     aRet.startOffset.realloc( 1 );
     713           0 :                     aRet.startOffset[ 0 ] = nCmpIdx;
     714           0 :                     aRet.endOffset.realloc( 1 );
     715           0 :                     aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
     716           0 :                     return aRet;
     717             :                 }
     718             :             }
     719             :         }
     720           0 :         nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
     721           0 :         if( nCmpIdx < nSuchIdx )
     722           0 :             return aRet;
     723           0 :         nCmpIdx -= nSuchIdx;
     724             :     }
     725           0 :     return aRet;
     726             : }
     727             : 
     728           0 : void TextSearch::RESrchPrepare( const ::com::sun::star::util::SearchOptions& rOptions)
     729             : {
     730             :     // select the transliterated pattern string
     731             :     const OUString& rPatternStr =
     732           0 :         (rOptions.transliterateFlags & SIMPLE_TRANS_MASK) ? sSrchStr
     733           0 :         : ((rOptions.transliterateFlags & COMPLEX_TRANS_MASK) ? sSrchStr2 : rOptions.searchString);
     734             : 
     735           0 :     sal_uInt32 nIcuSearchFlags = UREGEX_UWORD; // request UAX#29 unicode capability
     736             :     // map com::sun::star::util::SearchFlags to ICU uregex.h flags
     737             :     // TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
     738             :     // REG_NEWLINE is neither properly defined nor used anywhere => not implemented
     739             :     // REG_NOSUB is not used anywhere => not implemented
     740             :     // NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
     741             :     // LEV_RELAXED is only used for SearchAlgorithm==Approximate
     742             :     // Note that the search flag ALL_IGNORE_CASE is deprecated in UNO
     743             :     // probably because the transliteration flag IGNORE_CASE handles it as well.
     744           0 :     if( (rOptions.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0
     745           0 :     ||  (rOptions.transliterateFlags & TransliterationModules_IGNORE_CASE) != 0)
     746           0 :         nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
     747           0 :     UErrorCode nIcuErr = U_ZERO_ERROR;
     748             :     // assumption: transliteration didn't mangle regexp control chars
     749           0 :     IcuUniString aIcuSearchPatStr( (const UChar*)rPatternStr.getStr(), rPatternStr.getLength());
     750             : #ifndef DISABLE_WORDBOUND_EMULATION
     751             :     // for conveniance specific syntax elements of the old regex engine are emulated
     752             :     // - by replacing \< with "word-break followed by a look-ahead word-char"
     753           0 :     static const IcuUniString aChevronPatternB( "\\\\<", -1, IcuUniString::kInvariant);
     754           0 :     static const IcuUniString aChevronReplaceB( "\\\\b(?=\\\\w)", -1, IcuUniString::kInvariant);
     755           0 :     static RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr);
     756           0 :     aChevronMatcherB.reset( aIcuSearchPatStr);
     757           0 :     aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, nIcuErr);
     758           0 :     aChevronMatcherB.reset();
     759             :     // - by replacing \> with "look-behind word-char followed by a word-break"
     760           0 :     static const IcuUniString aChevronPatternE( "\\\\>", -1, IcuUniString::kInvariant);
     761           0 :     static const IcuUniString aChevronReplaceE( "(?<=\\\\w)\\\\b", -1, IcuUniString::kInvariant);
     762           0 :     static RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr);
     763           0 :     aChevronMatcherE.reset( aIcuSearchPatStr);
     764           0 :     aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, nIcuErr);
     765           0 :     aChevronMatcherE.reset();
     766             : #endif
     767           0 :     pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
     768           0 :     if (nIcuErr)
     769             :     {
     770             :         SAL_INFO( "i18npool", "TextSearch::RESrchPrepare UErrorCode " << nIcuErr);
     771           0 :         delete pRegexMatcher;
     772           0 :         pRegexMatcher = NULL;
     773             :     }
     774             :     else
     775             :     {
     776             :         // Pathological patterns may result in exponential run time making the
     777             :         // application appear to be frozen. Limit that. Documentation for this
     778             :         // call says
     779             :         // https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00
     780             :         // "The units of the limit are steps of the match engine.
     781             :         // Correspondence with actual processor time will depend on the speed
     782             :         // of the processor and the details of the specific pattern, but will
     783             :         // typically be on the order of milliseconds."
     784             :         // Just what is a good value? 42 is always an answer ... the 23 enigma
     785             :         // as well.. which on the dev's machine is roughly 50 seconds with the
     786             :         // pattern of fdo#70627.
     787             :         /* TODO: make this a configuration settable value and possibly take
     788             :          * complexity of expression into account and maybe even length of text
     789             :          * to be matched; currently (2013-11-25) that is at most one 64k
     790             :          * paragraph per RESrchFrwrd()/RESrchBkwrd() call. */
     791           0 :         pRegexMatcher->setTimeLimit( 23*1000, nIcuErr);
     792           0 :     }
     793           0 : }
     794             : 
     795             : 
     796             : 
     797           0 : static bool lcl_findRegex( RegexMatcher * pRegexMatcher, sal_Int32 nStartPos, UErrorCode & rIcuErr )
     798             : {
     799           0 :     if (!pRegexMatcher->find( nStartPos, rIcuErr))
     800             :     {
     801             :         /* TODO: future versions could pass the UErrorCode or translations
     802             :          * thereof to the caller, for example to inform the user of
     803             :          * U_REGEX_TIME_OUT. The strange thing though is that an error is set
     804             :          * only after the second call that returns immediately and not if
     805             :          * timeout occurred on the first call?!? */
     806             :         SAL_INFO( "i18npool", "lcl_findRegex UErrorCode " << rIcuErr);
     807           0 :         return false;
     808             :     }
     809           0 :     return true;
     810             : }
     811             : 
     812           0 : SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
     813             :                                       sal_Int32 startPos, sal_Int32 endPos )
     814             :             throw(RuntimeException)
     815             : {
     816           0 :     SearchResult aRet;
     817           0 :     aRet.subRegExpressions = 0;
     818           0 :     if( !pRegexMatcher)
     819           0 :         return aRet;
     820             : 
     821           0 :     if( endPos > searchStr.getLength())
     822           0 :         endPos = searchStr.getLength();
     823             : 
     824             :     // use the ICU RegexMatcher to find the matches
     825           0 :     UErrorCode nIcuErr = U_ZERO_ERROR;
     826           0 :     const IcuUniString aSearchTargetStr( (const UChar*)searchStr.getStr(), endPos);
     827           0 :     pRegexMatcher->reset( aSearchTargetStr);
     828             :     // search until there is a valid match
     829             :     for(;;)
     830             :     {
     831           0 :         if (!lcl_findRegex( pRegexMatcher, startPos, nIcuErr))
     832           0 :             return aRet;
     833             : 
     834             :         // #i118887# ignore zero-length matches e.g. "a*" in "bc"
     835           0 :         int nStartOfs = pRegexMatcher->start( nIcuErr);
     836           0 :         int nEndOfs = pRegexMatcher->end( nIcuErr);
     837           0 :         if( nStartOfs < nEndOfs)
     838           0 :             break;
     839             :         // If the zero-length match is behind the string, do not match it again
     840             :         // and again until startPos reaches there. A match behind the string is
     841             :         // a "$" anchor.
     842           0 :         if (nStartOfs == endPos)
     843           0 :             break;
     844             :         // try at next position if there was a zero-length match
     845           0 :         if( ++startPos >= endPos)
     846           0 :             return aRet;
     847           0 :     }
     848             : 
     849             :     // extract the result of the search
     850           0 :     const int nGroupCount = pRegexMatcher->groupCount();
     851           0 :     aRet.subRegExpressions = nGroupCount + 1;
     852           0 :     aRet.startOffset.realloc( aRet.subRegExpressions);
     853           0 :     aRet.endOffset.realloc( aRet.subRegExpressions);
     854           0 :     aRet.startOffset[0] = pRegexMatcher->start( nIcuErr);
     855           0 :     aRet.endOffset[0]   = pRegexMatcher->end( nIcuErr);
     856           0 :     for( int i = 1; i <= nGroupCount; ++i) {
     857           0 :         aRet.startOffset[i] = pRegexMatcher->start( i, nIcuErr);
     858           0 :         aRet.endOffset[i]   = pRegexMatcher->end( i, nIcuErr);
     859             :     }
     860             : 
     861           0 :     return aRet;
     862             : }
     863             : 
     864           0 : SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
     865             :                                       sal_Int32 startPos, sal_Int32 endPos )
     866             :             throw(RuntimeException)
     867             : {
     868             :     // NOTE: for backwards search callers provide startPos/endPos inverted!
     869           0 :     SearchResult aRet;
     870           0 :     aRet.subRegExpressions = 0;
     871           0 :     if( !pRegexMatcher)
     872           0 :         return aRet;
     873             : 
     874           0 :     if( startPos > searchStr.getLength())
     875           0 :         startPos = searchStr.getLength();
     876             : 
     877             :     // use the ICU RegexMatcher to find the matches
     878             :     // TODO: use ICU's backward searching once it becomes available
     879             :     //       as its replacement using forward search is not as good as the real thing
     880           0 :     UErrorCode nIcuErr = U_ZERO_ERROR;
     881           0 :     const IcuUniString aSearchTargetStr( (const UChar*)searchStr.getStr(), startPos);
     882           0 :     pRegexMatcher->reset( aSearchTargetStr);
     883           0 :     if (!lcl_findRegex( pRegexMatcher, endPos, nIcuErr))
     884           0 :         return aRet;
     885             : 
     886             :     // find the last match
     887           0 :     int nLastPos = 0;
     888           0 :     int nFoundEnd = 0;
     889           0 :     int nGoodPos = 0, nGoodEnd = 0;
     890           0 :     bool bFirst = true;
     891           0 :     do {
     892           0 :         nLastPos = pRegexMatcher->start( nIcuErr);
     893           0 :         nFoundEnd = pRegexMatcher->end( nIcuErr);
     894           0 :         if (nLastPos < nFoundEnd)
     895             :         {
     896             :             // remember last non-zero-length match
     897           0 :             nGoodPos = nLastPos;
     898           0 :             nGoodEnd = nFoundEnd;
     899             :         }
     900           0 :         if( nFoundEnd >= startPos)
     901           0 :             break;
     902           0 :         bFirst = false;
     903           0 :         if( nFoundEnd == nLastPos)
     904           0 :             ++nFoundEnd;
     905           0 :     } while( lcl_findRegex( pRegexMatcher, nFoundEnd, nIcuErr));
     906             : 
     907             :     // Ignore all zero-length matches except "$" anchor on first match.
     908           0 :     if (nGoodPos == nGoodEnd)
     909             :     {
     910           0 :         if (bFirst && nLastPos == startPos)
     911           0 :             nGoodPos = nLastPos;
     912             :         else
     913           0 :             return aRet;
     914             :     }
     915             : 
     916             :     // find last match again to get its details
     917           0 :     lcl_findRegex( pRegexMatcher, nGoodPos, nIcuErr);
     918             : 
     919             :     // fill in the details of the last match
     920           0 :     const int nGroupCount = pRegexMatcher->groupCount();
     921           0 :     aRet.subRegExpressions = nGroupCount + 1;
     922           0 :     aRet.startOffset.realloc( aRet.subRegExpressions);
     923           0 :     aRet.endOffset.realloc( aRet.subRegExpressions);
     924             :     // NOTE: existing users of backward search seem to expect startOfs/endOfs being inverted!
     925           0 :     aRet.startOffset[0] = pRegexMatcher->end( nIcuErr);
     926           0 :     aRet.endOffset[0]   = pRegexMatcher->start( nIcuErr);
     927           0 :     for( int i = 1; i <= nGroupCount; ++i) {
     928           0 :         aRet.startOffset[i] = pRegexMatcher->end( i, nIcuErr);
     929           0 :         aRet.endOffset[i]   = pRegexMatcher->start( i, nIcuErr);
     930             :     }
     931             : 
     932           0 :     return aRet;
     933             : }
     934             : 
     935             : 
     936             : 
     937             : // search for words phonetically
     938           0 : SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
     939             :                                           sal_Int32 startPos, sal_Int32 endPos )
     940             :             throw(RuntimeException)
     941             : {
     942           0 :     SearchResult aRet;
     943           0 :     aRet.subRegExpressions = 0;
     944             : 
     945           0 :     if( !xBreak.is() )
     946           0 :         return aRet;
     947             : 
     948           0 :     OUString aWTemp( searchStr );
     949             : 
     950             :     sal_Int32 nStt, nEnd;
     951             : 
     952           0 :     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
     953             :             aSrchPara.Locale,
     954           0 :             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
     955             : 
     956           0 :     do
     957             :     {
     958           0 :         if( aWBnd.startPos >= endPos )
     959           0 :             break;
     960           0 :         nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
     961           0 :         nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
     962             : 
     963           0 :         if( nStt < nEnd &&
     964           0 :                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
     965             :         {
     966           0 :             aRet.subRegExpressions = 1;
     967           0 :             aRet.startOffset.realloc( 1 );
     968           0 :             aRet.startOffset[ 0 ] = nStt;
     969           0 :             aRet.endOffset.realloc( 1 );
     970           0 :             aRet.endOffset[ 0 ] = nEnd;
     971           0 :             break;
     972             :         }
     973             : 
     974           0 :         nStt = nEnd - 1;
     975           0 :         aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
     976           0 :                 WordType::ANYWORD_IGNOREWHITESPACES);
     977           0 :     } while( aWBnd.startPos != aWBnd.endPos ||
     978           0 :             (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
     979             :     // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
     980             :     // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
     981             :     // and nextWord() does also => don't loop forever.
     982           0 :     return aRet;
     983             : }
     984             : 
     985           0 : SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
     986             :                                           sal_Int32 startPos, sal_Int32 endPos )
     987             :             throw(RuntimeException)
     988             : {
     989           0 :     SearchResult aRet;
     990           0 :     aRet.subRegExpressions = 0;
     991             : 
     992           0 :     if( !xBreak.is() )
     993           0 :         return aRet;
     994             : 
     995           0 :     OUString aWTemp( searchStr );
     996             : 
     997             :     sal_Int32 nStt, nEnd;
     998             : 
     999           0 :     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
    1000             :             aSrchPara.Locale,
    1001           0 :             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
    1002             : 
    1003           0 :     do
    1004             :     {
    1005           0 :         if( aWBnd.endPos <= endPos )
    1006           0 :             break;
    1007           0 :         nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
    1008           0 :         nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
    1009             : 
    1010           0 :         if( nStt < nEnd &&
    1011           0 :                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
    1012             :         {
    1013           0 :             aRet.subRegExpressions = 1;
    1014           0 :             aRet.startOffset.realloc( 1 );
    1015           0 :             aRet.startOffset[ 0 ] = nEnd;
    1016           0 :             aRet.endOffset.realloc( 1 );
    1017           0 :             aRet.endOffset[ 0 ] = nStt;
    1018           0 :             break;
    1019             :         }
    1020           0 :         if( !nStt )
    1021           0 :             break;
    1022             : 
    1023           0 :         aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
    1024           0 :                 WordType::ANYWORD_IGNOREWHITESPACES);
    1025           0 :     } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
    1026           0 :     return aRet;
    1027             : }
    1028             : 
    1029             : 
    1030             : static const sal_Char cSearchName[] = "com.sun.star.util.TextSearch";
    1031             : static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
    1032             : 
    1033           0 : static OUString getServiceName_Static()
    1034             : {
    1035           0 :     return OUString::createFromAscii( cSearchName );
    1036             : }
    1037             : 
    1038           0 : static OUString getImplementationName_Static()
    1039             : {
    1040           0 :     return OUString::createFromAscii( cSearchImpl );
    1041             : }
    1042             : 
    1043             : OUString SAL_CALL
    1044           0 : TextSearch::getImplementationName()
    1045             :                 throw( RuntimeException, std::exception )
    1046             : {
    1047           0 :     return getImplementationName_Static();
    1048             : }
    1049             : 
    1050           0 : sal_Bool SAL_CALL TextSearch::supportsService(const OUString& rServiceName)
    1051             :                 throw( RuntimeException, std::exception )
    1052             : {
    1053           0 :     return cppu::supportsService(this, rServiceName);
    1054             : }
    1055             : 
    1056             : Sequence< OUString > SAL_CALL
    1057           0 : TextSearch::getSupportedServiceNames(void) throw( RuntimeException, std::exception )
    1058             : {
    1059           0 :     Sequence< OUString > aRet(1);
    1060           0 :     aRet[0] = getServiceName_Static();
    1061           0 :     return aRet;
    1062             : }
    1063             : 
    1064             : ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
    1065           0 : SAL_CALL TextSearch_CreateInstance(
    1066             :         const ::com::sun::star::uno::Reference<
    1067             :         ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
    1068             : {
    1069             :     return ::com::sun::star::uno::Reference<
    1070             :         ::com::sun::star::uno::XInterface >(
    1071             :                 (::cppu::OWeakObject*) new TextSearch(
    1072           0 :                         comphelper::getComponentContext( rxMSF ) ) );
    1073             : }
    1074             : 
    1075             : extern "C"
    1076             : {
    1077             : SAL_DLLPUBLIC_EXPORT void* SAL_CALL
    1078           0 : i18nsearch_component_getFactory( const sal_Char* sImplementationName,
    1079             :                                  void* _pServiceManager,
    1080             :                                  SAL_UNUSED_PARAMETER void* )
    1081             : {
    1082           0 :     void* pRet = NULL;
    1083             : 
    1084             :     ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
    1085             :         reinterpret_cast< ::com::sun::star::lang::XMultiServiceFactory* >
    1086           0 :             ( _pServiceManager );
    1087             :     ::com::sun::star::uno::Reference<
    1088           0 :             ::com::sun::star::lang::XSingleServiceFactory > xFactory;
    1089             : 
    1090           0 :     if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
    1091             :     {
    1092           0 :         ::com::sun::star::uno::Sequence< OUString > aServiceNames(1);
    1093           0 :         aServiceNames[0] = getServiceName_Static();
    1094           0 :         xFactory = ::cppu::createSingleFactory(
    1095             :                 pServiceManager, getImplementationName_Static(),
    1096           0 :                 &TextSearch_CreateInstance, aServiceNames );
    1097             :     }
    1098             : 
    1099           0 :     if ( xFactory.is() )
    1100             :     {
    1101           0 :         xFactory->acquire();
    1102           0 :         pRet = xFactory.get();
    1103             :     }
    1104             : 
    1105           0 :     return pRet;
    1106             : }
    1107             : 
    1108             : } // extern "C"
    1109             : 
    1110             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10