LCOV - commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 - i18npool/source/search/textsearch.cxx

LCOV - code coverage report

Current view:	top level - i18npool/source/search - textsearch.cxx (source / functions)		Hit	Total	Coverage
Test:	commit c8344322a7af75b84dd3ca8f78b05543a976dfd5	Lines:	345	567	60.8 %
Date:	2015-06-13 12:38:46	Functions:	27	35	77.1 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "textsearch.hxx"
      21             : #include "levdis.hxx"
      22             : #include <com/sun/star/lang/Locale.hpp>
      23             : #include <com/sun/star/lang/XMultiServiceFactory.hpp>
      24             : #include <comphelper/processfactory.hxx>
      25             : #include <com/sun/star/i18n/BreakIterator.hpp>
      26             : #include <com/sun/star/i18n/UnicodeType.hpp>
      27             : #include <com/sun/star/util/SearchFlags.hpp>
      28             : #include <com/sun/star/i18n/WordType.hpp>
      29             : #include <com/sun/star/i18n/ScriptType.hpp>
      30             : #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
      31             : #include <com/sun/star/i18n/CharacterClassification.hpp>
      32             : #include <com/sun/star/i18n/KCharacterType.hpp>
      33             : #include <com/sun/star/i18n/Transliteration.hpp>
      34             : #include <com/sun/star/registry/XRegistryKey.hpp>
      35             : #include <cppuhelper/factory.hxx>
      36             : #include <cppuhelper/supportsservice.hxx>
      37             : #include <cppuhelper/weak.hxx>
      38             : #include <sal/log.hxx>
      39             : 
      40             : #ifdef _MSC_VER
      41             : // get rid of that dumb compiler warning
      42             : // identifier was truncated to '255' characters in the debug information
      43             : // for STL template usage, if .pdb files are to be created
      44             : #pragma warning( disable: 4786 )
      45             : #endif
      46             : 
      47             : #include <string.h>
      48             : 
      49             : using namespace ::com::sun::star::util;
      50             : using namespace ::com::sun::star::uno;
      51             : using namespace ::com::sun::star::lang;
      52             : using namespace ::com::sun::star::i18n;
      53             : using namespace ::com::sun::star;
      54             : 
      55             : const sal_Int32 COMPLEX_TRANS_MASK =
      56             :     TransliterationModules_ignoreBaFa_ja_JP |
      57             :     TransliterationModules_ignoreIterationMark_ja_JP |
      58             :     TransliterationModules_ignoreTiJi_ja_JP |
      59             :     TransliterationModules_ignoreHyuByu_ja_JP |
      60             :     TransliterationModules_ignoreSeZe_ja_JP |
      61             :     TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
      62             :     TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
      63             :     TransliterationModules_ignoreProlongedSoundMark_ja_JP;
      64             : 
      65             : namespace
      66             : {
      67           0 : sal_Int32 maskComplexTrans( sal_Int32 n )
      68             : {
      69             :     // IGNORE_KANA and FULLWIDTH_HALFWIDTH are simple but need to take effect
      70             :     // in complex transliteration.
      71             :     return
      72           0 :         (n & COMPLEX_TRANS_MASK) |                      // all set ignore bits
      73             :         TransliterationModules_IGNORE_KANA |            // plus IGNORE_KANA bit
      74           0 :         TransliterationModules_FULLWIDTH_HALFWIDTH;     // and the FULLWIDTH_HALFWIDTH value
      75             : }
      76             : 
      77         156 : bool isComplexTrans( sal_Int32 n )
      78             : {
      79         156 :     return n & COMPLEX_TRANS_MASK;
      80             : }
      81             : 
      82         281 : sal_Int32 maskSimpleTrans( sal_Int32 n )
      83             : {
      84         281 :     return n & ~COMPLEX_TRANS_MASK;
      85             : }
      86             : 
      87         218 : bool isSimpleTrans( sal_Int32 n )
      88             : {
      89         218 :     return maskSimpleTrans(n) != 0;
      90             : }
      91             : 
      92             : // Regex patterns are case sensitive.
      93          21 : sal_Int32 maskSimpleRegexTrans( sal_Int32 n )
      94             : {
      95          21 :     sal_Int32 m = (n & TransliterationModules_IGNORE_MASK) & ~TransliterationModules_IGNORE_CASE;
      96          21 :     sal_Int32 v = n & TransliterationModules_NON_IGNORE_MASK;
      97          21 :     if (v == TransliterationModules_UPPERCASE_LOWERCASE || v == TransliterationModules_LOWERCASE_UPPERCASE)
      98           0 :         v = 0;
      99          21 :     return (m | v) & ~COMPLEX_TRANS_MASK;
     100             : }
     101             : 
     102          19 : bool isSimpleRegexTrans( sal_Int32 n )
     103             : {
     104          19 :     return maskSimpleRegexTrans(n) != 0;
     105             : }
     106             : };
     107             : 
     108          74 : TextSearch::TextSearch(const Reference < XComponentContext > & rxContext)
     109             :         : m_xContext( rxContext )
     110             :         , pJumpTable( 0 )
     111             :         , pJumpTable2( 0 )
     112             :         , pRegexMatcher( NULL )
     113          74 :         , pWLD( 0 )
     114             : {
     115          74 :     SearchOptions aOpt;
     116          74 :     aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
     117          74 :     aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
     118             :     //aOpt.Locale = ???;
     119          74 :     setOptions( aOpt );
     120          74 : }
     121             : 
     122         186 : TextSearch::~TextSearch()
     123             : {
     124          62 :     delete pRegexMatcher;
     125          62 :     delete pWLD;
     126          62 :     delete pJumpTable;
     127          62 :     delete pJumpTable2;
     128         124 : }
     129             : 
     130         146 : void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException, std::exception )
     131             : {
     132         146 :     aSrchPara = rOptions;
     133             : 
     134         146 :     delete pRegexMatcher, pRegexMatcher = NULL;
     135         146 :     delete pWLD, pWLD = 0;
     136         146 :     delete pJumpTable, pJumpTable = 0;
     137         146 :     delete pJumpTable2, pJumpTable2 = 0;
     138             : 
     139             :     // Create Transliteration class
     140         146 :     if( isSimpleTrans( aSrchPara.transliterateFlags) )
     141             :     {
     142          62 :         if( !xTranslit.is() )
     143          62 :             xTranslit.set( Transliteration::create( m_xContext ) );
     144          62 :         xTranslit->loadModule(
     145          62 :              (TransliterationModules) maskSimpleTrans( aSrchPara.transliterateFlags),
     146         124 :              aSrchPara.Locale);
     147             :     }
     148          84 :     else if( xTranslit.is() )
     149           0 :         xTranslit = 0;
     150             : 
     151             :     // Create Transliteration for 2<->1, 2<->2 transliteration
     152         146 :     if ( isComplexTrans( aSrchPara.transliterateFlags) )
     153             :     {
     154           0 :         if( !xTranslit2.is() )
     155           0 :             xTranslit2.set( Transliteration::create( m_xContext ) );
     156             :         // Load transliteration module
     157           0 :         xTranslit2->loadModule(
     158           0 :              (TransliterationModules) maskComplexTrans( aSrchPara.transliterateFlags),
     159           0 :              aSrchPara.Locale);
     160             :     }
     161             : 
     162         146 :     if ( !xBreak.is() )
     163          74 :         xBreak = com::sun::star::i18n::BreakIterator::create( m_xContext );
     164             : 
     165         146 :     sSrchStr = aSrchPara.searchString;
     166             : 
     167             :     // Transliterate search string.
     168         146 :     if (aSrchPara.algorithmType == SearchAlgorithms_REGEXP)
     169             :     {
     170          19 :         if (isSimpleRegexTrans( aSrchPara.transliterateFlags))
     171             :         {
     172           2 :             if (maskSimpleRegexTrans( aSrchPara.transliterateFlags) !=
     173           1 :                     maskSimpleTrans( aSrchPara.transliterateFlags))
     174             :             {
     175             :                 com::sun::star::uno::Reference< XExtendedTransliteration > xTranslitPattern(
     176           1 :                          Transliteration::create( m_xContext ));
     177           1 :                 if (xTranslitPattern.is())
     178             :                 {
     179           1 :                     xTranslitPattern->loadModule(
     180           1 :                             (TransliterationModules) maskSimpleRegexTrans( aSrchPara.transliterateFlags),
     181           2 :                             aSrchPara.Locale);
     182           3 :                     sSrchStr = xTranslitPattern->transliterateString2String(
     183           2 :                             aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
     184           1 :                 }
     185             :             }
     186             :             else
     187             :             {
     188           0 :                 if (xTranslit.is())
     189           0 :                     sSrchStr = xTranslit->transliterateString2String(
     190           0 :                             aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
     191             :             }
     192             :             // xTranslit2 complex transliterated sSrchStr2 is not used in
     193             :             // regex, see TextSearch::searchForward() and
     194             :             // TextSearch::searchBackward()
     195             :         }
     196             :     }
     197             :     else
     198             :     {
     199         127 :         if ( xTranslit.is() && isSimpleTrans( aSrchPara.transliterateFlags) )
     200         159 :             sSrchStr = xTranslit->transliterateString2String(
     201         106 :                     aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
     202             : 
     203         127 :         if ( xTranslit2.is() && isComplexTrans( aSrchPara.transliterateFlags) )
     204           0 :             sSrchStr2 = xTranslit2->transliterateString2String(
     205           0 :                     aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
     206             :     }
     207             : 
     208             :     // When start or end of search string is a complex script type, we need to
     209             :     // make sure the result boundary is not located in the middle of cell.
     210         146 :     checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
     211         146 :                 ScriptType::COMPLEX));
     212         292 :     checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
     213         292 :                     sSrchStr.getLength()-1) == ScriptType::COMPLEX));
     214             : 
     215         146 :     switch( aSrchPara.algorithmType)
     216             :     {
     217             :         case SearchAlgorithms_REGEXP:
     218          19 :             fnForward = &TextSearch::RESrchFrwrd;
     219          19 :             fnBackward = &TextSearch::RESrchBkwrd;
     220          19 :             RESrchPrepare( aSrchPara);
     221          19 :             break;
     222             : 
     223             :         case SearchAlgorithms_APPROXIMATE:
     224           0 :             fnForward = &TextSearch::ApproxSrchFrwrd;
     225           0 :             fnBackward = &TextSearch::ApproxSrchBkwrd;
     226             : 
     227           0 :             pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
     228             :                     aSrchPara.insertedChars, aSrchPara.deletedChars,
     229           0 :                     0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
     230             : 
     231           0 :             nLimit = pWLD->GetLimit();
     232           0 :             break;
     233             : 
     234             :         default:
     235         127 :             fnForward = &TextSearch::NSrchFrwrd;
     236         127 :             fnBackward = &TextSearch::NSrchBkwrd;
     237         127 :             break;
     238             :     }
     239         146 : }
     240             : 
     241          12 : sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
     242             : {
     243          12 :     sal_Int32 nRet = 0, nEnd = rOff.getLength();
     244          12 :     while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
     245          12 :     return nRet;
     246             : }
     247             : 
     248           0 : bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
     249             :         throw( RuntimeException )
     250             : {
     251             :     sal_Int32 nDone;
     252           0 :     return nPos == xBreak->previousCharacters(searchStr, nPos+1,
     253           0 :             aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
     254             : }
     255             : 
     256        1109 : SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
     257             :         throw( RuntimeException, std::exception )
     258             : {
     259        1109 :     SearchResult sres;
     260             : 
     261        2218 :     OUString in_str(searchStr);
     262             : 
     263        1109 :     bUsePrimarySrchStr = true;
     264             : 
     265        1109 :     if ( xTranslit.is() )
     266             :     {
     267             :         // apply normal transliteration (1<->1, 1<->0)
     268         358 :         com::sun::star::uno::Sequence<sal_Int32> offset(endPos - startPos);
     269         358 :         in_str = xTranslit->transliterate( searchStr, startPos, endPos - startPos, offset );
     270             : 
     271             :         // JP 20.6.2001: also the start and end positions must be corrected!
     272             :         sal_Int32 newStartPos =
     273         358 :             (startPos == 0) ? 0 : FindPosInSeq_Impl( offset, startPos );
     274             : 
     275         358 :         sal_Int32 newEndPos = (endPos < searchStr.getLength())
     276             :             ? FindPosInSeq_Impl( offset, endPos )
     277         358 :             : in_str.getLength();
     278             : 
     279         358 :         sal_Int32 nExtraOffset = 0;
     280         358 :         if (pRegexMatcher && startPos > 0)
     281             :         {
     282             :             // avoid matching ^ here - in_str omits a prefix of the searchStr
     283             :             // this is a really lame way to do it, but ICU only offers
     284             :             // useAnchoringBounds() to disable *both* bounds but what is needed
     285             :             // here is to disable only one bound and respect the other
     286           2 :             in_str = "X" + in_str;
     287           2 :             nExtraOffset = 1;
     288           2 :             newStartPos += nExtraOffset;
     289           2 :             newEndPos += nExtraOffset;
     290             :         }
     291             : 
     292         358 :         sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
     293             : 
     294             :         // Map offsets back to untransliterated string.
     295         358 :         const sal_Int32 nOffsets = offset.getLength();
     296         358 :         if (nOffsets)
     297             :         {
     298             :             // For regex nGroups is the number of groups+1 with group 0 being
     299             :             // the entire match.
     300         356 :             const sal_Int32 nGroups = sres.startOffset.getLength();
     301         436 :             for ( sal_Int32 k = 0; k < nGroups; k++ )
     302             :             {
     303          80 :                 const sal_Int32 nStart = sres.startOffset[k] - nExtraOffset;
     304          80 :                 if (startPos > 0 || nStart > 0)
     305          16 :                     sres.startOffset[k] = (nStart < nOffsets ? offset[nStart] : (offset[nOffsets - 1] + 1));
     306             :                 // JP 20.6.2001: end is ever exclusive and then don't return
     307             :                 //               the position of the next character - return the
     308             :                 //               next position behind the last found character!
     309             :                 //               "a b c" find "b" must return 2,3 and not 2,4!!!
     310          80 :                 const sal_Int32 nStop = sres.endOffset[k] - nExtraOffset;
     311          80 :                 if (startPos > 0 || nStop > 0)
     312          80 :                     sres.endOffset[k] = offset[(nStop <= nOffsets ? nStop : nOffsets) - 1] + 1;
     313             :             }
     314         358 :         }
     315             :     }
     316             :     else
     317             :     {
     318         751 :         sres = (this->*fnForward)( in_str, startPos, endPos );
     319             :     }
     320             : 
     321        1109 :     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
     322             :     {
     323           0 :         SearchResult sres2;
     324             : 
     325           0 :         in_str = OUString(searchStr);
     326           0 :         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
     327             : 
     328           0 :         in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
     329             : 
     330           0 :         if( startPos )
     331           0 :             startPos = FindPosInSeq_Impl( offset, startPos );
     332             : 
     333           0 :         if( endPos < searchStr.getLength() )
     334           0 :             endPos = FindPosInSeq_Impl( offset, endPos );
     335             :         else
     336           0 :             endPos = in_str.getLength();
     337             : 
     338           0 :     bUsePrimarySrchStr = false;
     339           0 :         sres2 = (this->*fnForward)( in_str, startPos, endPos );
     340             : 
     341           0 :         for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
     342             :         {
     343           0 :             if (sres2.startOffset[k])
     344           0 :           sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
     345           0 :             if (sres2.endOffset[k])
     346           0 :           sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
     347             :         }
     348             : 
     349             :     // pick first and long one
     350           0 :     if ( sres.subRegExpressions == 0)
     351           0 :         return sres2;
     352           0 :     if ( sres2.subRegExpressions == 1)
     353             :     {
     354           0 :         if ( sres.startOffset[0] > sres2.startOffset[0])
     355           0 :             return sres2;
     356           0 :         else if ( sres.startOffset[0] == sres2.startOffset[0] &&
     357           0 :             sres.endOffset[0] < sres2.endOffset[0])
     358           0 :             return sres2;
     359           0 :     }
     360             :     }
     361             : 
     362        2218 :     return sres;
     363             : }
     364             : 
     365          12 : SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
     366             :         throw(RuntimeException, std::exception)
     367             : {
     368          12 :     SearchResult sres;
     369             : 
     370          24 :     OUString in_str(searchStr);
     371             : 
     372          12 :     bUsePrimarySrchStr = true;
     373             : 
     374          12 :     if ( xTranslit.is() )
     375             :     {
     376             :         // apply only simple 1<->1 transliteration here
     377          11 :         com::sun::star::uno::Sequence<sal_Int32> offset(startPos - endPos);
     378          11 :     in_str = xTranslit->transliterate( searchStr, endPos, startPos - endPos, offset );
     379             : 
     380             :         // JP 20.6.2001: also the start and end positions must be corrected!
     381          11 :         sal_Int32 const newStartPos = (startPos < searchStr.getLength())
     382             :             ? FindPosInSeq_Impl( offset, startPos )
     383          11 :             : in_str.getLength();
     384             : 
     385             :         sal_Int32 const newEndPos =
     386          11 :             (endPos == 0) ? 0 : FindPosInSeq_Impl( offset, endPos );
     387             : 
     388             :         // TODO: this would need nExtraOffset handling to avoid $ matching
     389             :         // if (pRegexMatcher && startPos < searchStr.getLength())
     390             :         // but that appears to be impossible with ICU regex
     391             : 
     392          11 :         sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
     393             : 
     394             :         // Map offsets back to untransliterated string.
     395          11 :         const sal_Int32 nOffsets = offset.getLength();
     396          11 :         if (nOffsets)
     397             :         {
     398             :             // For regex nGroups is the number of groups+1 with group 0 being
     399             :             // the entire match.
     400          11 :             const sal_Int32 nGroups = sres.startOffset.getLength();
     401          20 :             for ( sal_Int32 k = 0; k < nGroups; k++ )
     402             :             {
     403           9 :                 const sal_Int32 nStart = sres.startOffset[k];
     404           9 :                 if (endPos > 0 || nStart > 0)
     405           9 :                     sres.startOffset[k] = offset[(nStart <= nOffsets ? nStart : nOffsets) - 1] + 1;
     406             :                 // JP 20.6.2001: end is ever exclusive and then don't return
     407             :                 //               the position of the next character - return the
     408             :                 //               next position behind the last found character!
     409             :                 //               "a b c" find "b" must return 2,3 and not 2,4!!!
     410           9 :                 const sal_Int32 nStop = sres.endOffset[k];
     411           9 :                 if (endPos > 0 || nStop > 0)
     412           5 :                     sres.endOffset[k] = (nStop < nOffsets ? offset[nStop] : (offset[nOffsets - 1] + 1));
     413             :             }
     414          11 :         }
     415             :     }
     416             :     else
     417             :     {
     418           1 :         sres = (this->*fnBackward)( in_str, startPos, endPos );
     419             :     }
     420             : 
     421          12 :     if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
     422             :     {
     423           0 :     SearchResult sres2;
     424             : 
     425           0 :     in_str = OUString(searchStr);
     426           0 :         com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
     427             : 
     428           0 :         in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
     429             : 
     430           0 :         if( startPos < searchStr.getLength() )
     431           0 :             startPos = FindPosInSeq_Impl( offset, startPos );
     432             :         else
     433           0 :             startPos = in_str.getLength();
     434             : 
     435           0 :         if( endPos )
     436           0 :             endPos = FindPosInSeq_Impl( offset, endPos );
     437             : 
     438           0 :     bUsePrimarySrchStr = false;
     439           0 :     sres2 = (this->*fnBackward)( in_str, startPos, endPos );
     440             : 
     441           0 :         for( int k = 0; k < sres2.startOffset.getLength(); k++ )
     442             :         {
     443           0 :             if (sres2.startOffset[k])
     444           0 :                 sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
     445           0 :             if (sres2.endOffset[k])
     446           0 :                 sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
     447             :         }
     448             : 
     449             :     // pick last and long one
     450           0 :     if ( sres.subRegExpressions == 0 )
     451           0 :         return sres2;
     452           0 :     if ( sres2.subRegExpressions == 1 )
     453             :     {
     454           0 :         if ( sres.startOffset[0] < sres2.startOffset[0] )
     455           0 :             return sres2;
     456           0 :         if ( sres.startOffset[0] == sres2.startOffset[0] &&
     457           0 :         sres.endOffset[0] > sres2.endOffset[0] )
     458           0 :             return sres2;
     459           0 :     }
     460             :     }
     461             : 
     462          24 :     return sres;
     463             : }
     464             : 
     465             : 
     466             : 
     467           0 : bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
     468             : {
     469           0 :     bool bRet = true;
     470           0 :     if( '\x7f' != rStr[nPos])
     471             :     {
     472           0 :         if ( !xCharClass.is() )
     473           0 :              xCharClass = CharacterClassification::create( m_xContext );
     474           0 :         sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
     475           0 :                 aSrchPara.Locale );
     476           0 :         if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
     477           0 :                         KCharacterType::LETTER ) & nCType ) )
     478           0 :             bRet = false;
     479             :     }
     480           0 :     return bRet;
     481             : }
     482             : 
     483             : // --------- helper methods for Boyer-Moore like text searching ----------
     484             : // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available
     485             : 
     486         130 : void TextSearch::MakeForwardTab()
     487             : {
     488             :     // create the jumptable for the search text
     489         130 :     if( pJumpTable )
     490             :     {
     491          85 :         if( bIsForwardTab )
     492         215 :             return ;                                        // the jumpTable is ok
     493           0 :         delete pJumpTable;
     494             :     }
     495          45 :     bIsForwardTab = true;
     496             : 
     497          45 :     sal_Int32 n, nLen = sSrchStr.getLength();
     498          45 :     pJumpTable = new TextSearchJumpTable;
     499             : 
     500         299 :     for( n = 0; n < nLen - 1; ++n )
     501             :     {
     502         254 :         sal_Unicode cCh = sSrchStr[n];
     503         254 :         sal_Int32 nDiff = nLen - n - 1;
     504         254 :     TextSearchJumpTable::value_type aEntry( cCh, nDiff );
     505             : 
     506             :         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
     507         254 :             pJumpTable->insert( aEntry );
     508         254 :         if ( !aPair.second )
     509          67 :             (*(aPair.first)).second = nDiff;
     510             :     }
     511             : }
     512             : 
     513           0 : void TextSearch::MakeForwardTab2()
     514             : {
     515             :     // create the jumptable for the search text
     516           0 :     if( pJumpTable2 )
     517             :     {
     518           0 :         if( bIsForwardTab )
     519           0 :             return ;                                        // the jumpTable is ok
     520           0 :         delete pJumpTable2;
     521             :     }
     522           0 :     bIsForwardTab = true;
     523             : 
     524           0 :     sal_Int32 n, nLen = sSrchStr2.getLength();
     525           0 :     pJumpTable2 = new TextSearchJumpTable;
     526             : 
     527           0 :     for( n = 0; n < nLen - 1; ++n )
     528             :     {
     529           0 :         sal_Unicode cCh = sSrchStr2[n];
     530           0 :         sal_Int32 nDiff = nLen - n - 1;
     531             : 
     532           0 :     TextSearchJumpTable::value_type aEntry( cCh, nDiff );
     533             :         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
     534           0 :             pJumpTable2->insert( aEntry );
     535           0 :         if ( !aPair.second )
     536           0 :             (*(aPair.first)).second = nDiff;
     537             :     }
     538             : }
     539             : 
     540           7 : void TextSearch::MakeBackwardTab()
     541             : {
     542             :     // create the jumptable for the search text
     543           7 :     if( pJumpTable )
     544             :     {
     545           3 :         if( !bIsForwardTab )
     546           9 :             return ;                                        // the jumpTable is ok
     547           1 :         delete pJumpTable;
     548             :     }
     549           5 :     bIsForwardTab = false;
     550             : 
     551           5 :     sal_Int32 n, nLen = sSrchStr.getLength();
     552           5 :     pJumpTable = new TextSearchJumpTable;
     553             : 
     554          25 :     for( n = nLen-1; n > 0; --n )
     555             :     {
     556          20 :         sal_Unicode cCh = sSrchStr[n];
     557          20 :         TextSearchJumpTable::value_type aEntry( cCh, n );
     558             :         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
     559          20 :             pJumpTable->insert( aEntry );
     560          20 :         if ( !aPair.second )
     561           0 :             (*(aPair.first)).second = n;
     562             :     }
     563             : }
     564             : 
     565           0 : void TextSearch::MakeBackwardTab2()
     566             : {
     567             :     // create the jumptable for the search text
     568           0 :     if( pJumpTable2 )
     569             :     {
     570           0 :         if( !bIsForwardTab )
     571           0 :             return ;                                        // the jumpTable is ok
     572           0 :         delete pJumpTable2;
     573             :     }
     574           0 :     bIsForwardTab = false;
     575             : 
     576           0 :     sal_Int32 n, nLen = sSrchStr2.getLength();
     577           0 :     pJumpTable2 = new TextSearchJumpTable;
     578             : 
     579           0 :     for( n = nLen-1; n > 0; --n )
     580             :     {
     581           0 :         sal_Unicode cCh = sSrchStr2[n];
     582           0 :         TextSearchJumpTable::value_type aEntry( cCh, n );
     583             :         ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
     584           0 :             pJumpTable2->insert( aEntry );
     585           0 :         if ( !aPair.second )
     586           0 :             (*(aPair.first)).second = n;
     587             :     }
     588             : }
     589             : 
     590         326 : sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
     591             : {
     592             :     TextSearchJumpTable *pJump;
     593         326 :     OUString sSearchKey;
     594             : 
     595         326 :     if ( bUsePrimarySrchStr ) {
     596         326 :       pJump = pJumpTable;
     597         326 :       sSearchKey = sSrchStr;
     598             :     } else {
     599           0 :       pJump = pJumpTable2;
     600           0 :       sSearchKey = sSrchStr2;
     601             :     }
     602             : 
     603         326 :     TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
     604         326 :     if ( iLook == pJump->end() )
     605         262 :         return sSearchKey.getLength();
     606          64 :     return (*iLook).second;
     607             : }
     608             : 
     609             : 
     610         319 : SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
     611             :         throw(RuntimeException)
     612             : {
     613         319 :     SearchResult aRet;
     614         319 :     aRet.subRegExpressions = 0;
     615             : 
     616         638 :     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
     617             : 
     618         638 :     OUString aStr( searchStr );
     619         319 :     sal_Int32 nSuchIdx = aStr.getLength();
     620         319 :     sal_Int32 nEnde = endPos;
     621         319 :     if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
     622         189 :         return aRet;
     623             : 
     624             : 
     625         130 :     if( nEnde < sSearchKey.getLength() )  // position inside the search region ?
     626           0 :         return aRet;
     627             : 
     628         130 :     nEnde -= sSearchKey.getLength();
     629             : 
     630         130 :     if (bUsePrimarySrchStr)
     631         130 :       MakeForwardTab();                   // create the jumptable
     632             :     else
     633           0 :       MakeForwardTab2();
     634             : 
     635         427 :     for (sal_Int32 nCmpIdx = startPos; // start position for the search
     636             :             nCmpIdx <= nEnde;
     637         297 :             nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
     638             :     {
     639             :         // if the match would be the completed cells, skip it.
     640         357 :         if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
     641           0 :                     && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
     642           0 :             continue;
     643             : 
     644         357 :         nSuchIdx = sSearchKey.getLength() - 1;
     645        1170 :         while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
     646             :         {
     647         516 :             if( nSuchIdx == 0 )
     648             :             {
     649          60 :                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
     650             :                 {
     651           0 :                     sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
     652           0 :                     bool bAtStart = !nCmpIdx;
     653           0 :                     bool bAtEnd = nFndEnd == endPos;
     654           0 :                     bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
     655           0 :                     bool bDelimBehind = bAtEnd || IsDelimiter(  aStr, nFndEnd );
     656             :                     //  *       1 -> only one word in the paragraph
     657             :                     //  *       2 -> at begin of paragraph
     658             :                     //  *       3 -> at end of paragraph
     659             :                     //  *       4 -> inside the paragraph
     660           0 :                     if( !(  ( bAtStart && bAtEnd ) ||           // 1
     661           0 :                                 ( bAtStart && bDelimBehind ) ||     // 2
     662           0 :                                 ( bAtEnd && bDelimBefore ) ||       // 3
     663           0 :                                 ( bDelimBefore && bDelimBehind )))  // 4
     664             :                         break;
     665             :                 }
     666             : 
     667          60 :                 aRet.subRegExpressions = 1;
     668          60 :                 aRet.startOffset.realloc( 1 );
     669          60 :                 aRet.startOffset[ 0 ] = nCmpIdx;
     670          60 :                 aRet.endOffset.realloc( 1 );
     671          60 :                 aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
     672             : 
     673          60 :                 return aRet;
     674             :             }
     675             :             else
     676         456 :                 nSuchIdx--;
     677             :         }
     678             :     }
     679          70 :     return aRet;
     680             : }
     681             : 
     682           7 : SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
     683             :         throw(RuntimeException)
     684             : {
     685           7 :     SearchResult aRet;
     686           7 :     aRet.subRegExpressions = 0;
     687             : 
     688          14 :     OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
     689             : 
     690          14 :     OUString aStr( searchStr );
     691           7 :     sal_Int32 nSuchIdx = aStr.getLength();
     692           7 :     sal_Int32 nEnde = endPos;
     693           7 :     if( nSuchIdx == 0 || sSearchKey.isEmpty() || sSearchKey.getLength() > nSuchIdx)
     694           0 :         return aRet;
     695             : 
     696           7 :     if (bUsePrimarySrchStr)
     697           7 :       MakeBackwardTab();                      // create the jumptable
     698             :     else
     699           0 :       MakeBackwardTab2();
     700             : 
     701           7 :     if( nEnde == nSuchIdx )                 // end position for the search
     702           0 :         nEnde = sSearchKey.getLength();
     703             :     else
     704           7 :         nEnde += sSearchKey.getLength();
     705             : 
     706           7 :     sal_Int32 nCmpIdx = startPos;          // start position for the search
     707             : 
     708          43 :     while (nCmpIdx >= nEnde)
     709             :     {
     710             :         // if the match would be the completed cells, skip it.
     711          68 :         if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
     712          68 :                         sSearchKey.getLength() )) && (!checkCTLEnd ||
     713           0 :                     isCellStart( aStr, nCmpIdx)))
     714             :         {
     715          34 :             nSuchIdx = 0;
     716         149 :             while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
     717          55 :                     aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
     718          26 :                 nSuchIdx++;
     719          34 :             if( nSuchIdx >= sSearchKey.getLength() )
     720             :             {
     721           5 :                 if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
     722             :                 {
     723           0 :                     sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
     724           0 :                     bool bAtStart = !nFndStt;
     725           0 :                     bool bAtEnd = nCmpIdx == startPos;
     726           0 :                     bool bDelimBehind = bAtEnd || IsDelimiter( aStr, nCmpIdx );
     727           0 :                     bool bDelimBefore = bAtStart || // begin of paragraph
     728           0 :                         IsDelimiter( aStr, nFndStt-1 );
     729             :                     //  *       1 -> only one word in the paragraph
     730             :                     //  *       2 -> at begin of paragraph
     731             :                     //  *       3 -> at end of paragraph
     732             :                     //  *       4 -> inside the paragraph
     733           0 :                     if( ( bAtStart && bAtEnd ) ||           // 1
     734           0 :                             ( bAtStart && bDelimBehind ) ||     // 2
     735           0 :                             ( bAtEnd && bDelimBefore ) ||       // 3
     736           0 :                             ( bDelimBefore && bDelimBehind ))   // 4
     737             :                     {
     738           0 :                         aRet.subRegExpressions = 1;
     739           0 :                         aRet.startOffset.realloc( 1 );
     740           0 :                         aRet.startOffset[ 0 ] = nCmpIdx;
     741           0 :                         aRet.endOffset.realloc( 1 );
     742           0 :                         aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
     743           0 :                         return aRet;
     744             :                     }
     745             :                 }
     746             :                 else
     747             :                 {
     748           5 :                     aRet.subRegExpressions = 1;
     749           5 :                     aRet.startOffset.realloc( 1 );
     750           5 :                     aRet.startOffset[ 0 ] = nCmpIdx;
     751           5 :                     aRet.endOffset.realloc( 1 );
     752           5 :                     aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
     753           5 :                     return aRet;
     754             :                 }
     755             :             }
     756             :         }
     757          29 :         nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
     758          29 :         if( nCmpIdx < nSuchIdx )
     759           0 :             return aRet;
     760          29 :         nCmpIdx -= nSuchIdx;
     761             :     }
     762           2 :     return aRet;
     763             : }
     764             : 
     765          19 : void TextSearch::RESrchPrepare( const ::com::sun::star::util::SearchOptions& rOptions)
     766             : {
     767             :     // select the transliterated pattern string
     768             :     const OUString& rPatternStr =
     769          19 :         (isSimpleTrans( rOptions.transliterateFlags) ? sSrchStr
     770          19 :         : (isComplexTrans( rOptions.transliterateFlags) ? sSrchStr2 : rOptions.searchString));
     771             : 
     772          19 :     sal_uInt32 nIcuSearchFlags = UREGEX_UWORD; // request UAX#29 unicode capability
     773             :     // map com::sun::star::util::SearchFlags to ICU uregex.h flags
     774             :     // TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
     775             :     // REG_NEWLINE is neither properly defined nor used anywhere => not implemented
     776             :     // REG_NOSUB is not used anywhere => not implemented
     777             :     // NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
     778             :     // LEV_RELAXED is only used for SearchAlgorithm==Approximate
     779             :     // Note that the search flag ALL_IGNORE_CASE is deprecated in UNO
     780             :     // probably because the transliteration flag IGNORE_CASE handles it as well.
     781          19 :     if( (rOptions.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0
     782          13 :     ||  (rOptions.transliterateFlags & TransliterationModules_IGNORE_CASE) != 0)
     783          10 :         nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
     784          19 :     UErrorCode nIcuErr = U_ZERO_ERROR;
     785             :     // assumption: transliteration didn't mangle regexp control chars
     786          19 :     IcuUniString aIcuSearchPatStr( reinterpret_cast<const UChar*>(rPatternStr.getStr()), rPatternStr.getLength());
     787             : #ifndef DISABLE_WORDBOUND_EMULATION
     788             :     // for conveniance specific syntax elements of the old regex engine are emulated
     789             :     // - by replacing \< with "word-break followed by a look-ahead word-char"
     790          19 :     static const IcuUniString aChevronPatternB( "\\\\<", -1, IcuUniString::kInvariant);
     791          19 :     static const IcuUniString aChevronReplaceB( "\\\\b(?=\\\\w)", -1, IcuUniString::kInvariant);
     792          19 :     static RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr);
     793          19 :     aChevronMatcherB.reset( aIcuSearchPatStr);
     794          19 :     aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, nIcuErr);
     795          19 :     aChevronMatcherB.reset();
     796             :     // - by replacing \> with "look-behind word-char followed by a word-break"
     797          19 :     static const IcuUniString aChevronPatternE( "\\\\>", -1, IcuUniString::kInvariant);
     798          19 :     static const IcuUniString aChevronReplaceE( "(?<=\\\\w)\\\\b", -1, IcuUniString::kInvariant);
     799          19 :     static RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr);
     800          19 :     aChevronMatcherE.reset( aIcuSearchPatStr);
     801          19 :     aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, nIcuErr);
     802          19 :     aChevronMatcherE.reset();
     803             : #endif
     804          19 :     pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
     805          19 :     if (nIcuErr)
     806             :     {
     807             :         SAL_INFO( "i18npool", "TextSearch::RESrchPrepare UErrorCode " << nIcuErr);
     808           0 :         delete pRegexMatcher;
     809           0 :         pRegexMatcher = NULL;
     810             :     }
     811             :     else
     812             :     {
     813             :         // Pathological patterns may result in exponential run time making the
     814             :         // application appear to be frozen. Limit that. Documentation for this
     815             :         // call says
     816             :         // https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00
     817             :         // "The units of the limit are steps of the match engine.
     818             :         // Correspondence with actual processor time will depend on the speed
     819             :         // of the processor and the details of the specific pattern, but will
     820             :         // typically be on the order of milliseconds."
     821             :         // Just what is a good value? 42 is always an answer ... the 23 enigma
     822             :         // as well.. which on the dev's machine is roughly 50 seconds with the
     823             :         // pattern of fdo#70627.
     824             :         /* TODO: make this a configuration settable value and possibly take
     825             :          * complexity of expression into account and maybe even length of text
     826             :          * to be matched; currently (2013-11-25) that is at most one 64k
     827             :          * paragraph per RESrchFrwrd()/RESrchBkwrd() call. */
     828          19 :         pRegexMatcher->setTimeLimit( 23*1000, nIcuErr);
     829          19 :     }
     830          19 : }
     831             : 
     832             : 
     833             : 
     834         801 : static bool lcl_findRegex( RegexMatcher * pRegexMatcher, sal_Int32 nStartPos, UErrorCode & rIcuErr )
     835             : {
     836         801 :     if (!pRegexMatcher->find( nStartPos, rIcuErr))
     837             :     {
     838             :         /* TODO: future versions could pass the UErrorCode or translations
     839             :          * thereof to the caller, for example to inform the user of
     840             :          * U_REGEX_TIME_OUT. The strange thing though is that an error is set
     841             :          * only after the second call that returns immediately and not if
     842             :          * timeout occurred on the first call?!? */
     843             :         SAL_INFO( "i18npool", "lcl_findRegex UErrorCode " << rIcuErr);
     844         138 :         return false;
     845             :     }
     846         663 :     return true;
     847             : }
     848             : 
     849         790 : SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
     850             :                                       sal_Int32 startPos, sal_Int32 endPos )
     851             :             throw(RuntimeException)
     852             : {
     853         790 :     SearchResult aRet;
     854         790 :     aRet.subRegExpressions = 0;
     855         790 :     if( !pRegexMatcher)
     856           0 :         return aRet;
     857             : 
     858         790 :     if( endPos > searchStr.getLength())
     859           0 :         endPos = searchStr.getLength();
     860             : 
     861             :     // use the ICU RegexMatcher to find the matches
     862         790 :     UErrorCode nIcuErr = U_ZERO_ERROR;
     863        1580 :     const IcuUniString aSearchTargetStr( reinterpret_cast<const UChar*>(searchStr.getStr()), endPos);
     864         790 :     pRegexMatcher->reset( aSearchTargetStr);
     865             :     // search until there is a valid match
     866             :     for(;;)
     867             :     {
     868         790 :         if (!lcl_findRegex( pRegexMatcher, startPos, nIcuErr))
     869         137 :             return aRet;
     870             : 
     871             :         // #i118887# ignore zero-length matches e.g. "a*" in "bc"
     872         653 :         int nStartOfs = pRegexMatcher->start( nIcuErr);
     873         653 :         int nEndOfs = pRegexMatcher->end( nIcuErr);
     874         653 :         if( nStartOfs < nEndOfs)
     875         652 :             break;
     876             :         // If the zero-length match is behind the string, do not match it again
     877             :         // and again until startPos reaches there. A match behind the string is
     878             :         // a "$" anchor.
     879           1 :         if (nStartOfs == endPos)
     880           1 :             break;
     881             :         // try at next position if there was a zero-length match
     882           0 :         if( ++startPos >= endPos)
     883           0 :             return aRet;
     884           0 :     }
     885             : 
     886             :     // extract the result of the search
     887         653 :     const int nGroupCount = pRegexMatcher->groupCount();
     888         653 :     aRet.subRegExpressions = nGroupCount + 1;
     889         653 :     aRet.startOffset.realloc( aRet.subRegExpressions);
     890         653 :     aRet.endOffset.realloc( aRet.subRegExpressions);
     891         653 :     aRet.startOffset[0] = pRegexMatcher->start( nIcuErr);
     892         653 :     aRet.endOffset[0]   = pRegexMatcher->end( nIcuErr);
     893         655 :     for( int i = 1; i <= nGroupCount; ++i) {
     894           2 :         aRet.startOffset[i] = pRegexMatcher->start( i, nIcuErr);
     895           2 :         aRet.endOffset[i]   = pRegexMatcher->end( i, nIcuErr);
     896             :     }
     897             : 
     898         653 :     return aRet;
     899             : }
     900             : 
     901           5 : SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
     902             :                                       sal_Int32 startPos, sal_Int32 endPos )
     903             :             throw(RuntimeException)
     904             : {
     905             :     // NOTE: for backwards search callers provide startPos/endPos inverted!
     906           5 :     SearchResult aRet;
     907           5 :     aRet.subRegExpressions = 0;
     908           5 :     if( !pRegexMatcher)
     909           0 :         return aRet;
     910             : 
     911           5 :     if( startPos > searchStr.getLength())
     912           0 :         startPos = searchStr.getLength();
     913             : 
     914             :     // use the ICU RegexMatcher to find the matches
     915             :     // TODO: use ICU's backward searching once it becomes available
     916             :     //       as its replacement using forward search is not as good as the real thing
     917           5 :     UErrorCode nIcuErr = U_ZERO_ERROR;
     918          10 :     const IcuUniString aSearchTargetStr( reinterpret_cast<const UChar*>(searchStr.getStr()), startPos);
     919           5 :     pRegexMatcher->reset( aSearchTargetStr);
     920           5 :     if (!lcl_findRegex( pRegexMatcher, endPos, nIcuErr))
     921           0 :         return aRet;
     922             : 
     923             :     // find the last match
     924           5 :     int nLastPos = 0;
     925           5 :     int nFoundEnd = 0;
     926           5 :     int nGoodPos = 0, nGoodEnd = 0;
     927           5 :     bool bFirst = true;
     928           1 :     do {
     929           5 :         nLastPos = pRegexMatcher->start( nIcuErr);
     930           5 :         nFoundEnd = pRegexMatcher->end( nIcuErr);
     931           5 :         if (nLastPos < nFoundEnd)
     932             :         {
     933             :             // remember last non-zero-length match
     934           5 :             nGoodPos = nLastPos;
     935           5 :             nGoodEnd = nFoundEnd;
     936             :         }
     937           5 :         if( nFoundEnd >= startPos)
     938           4 :             break;
     939           1 :         bFirst = false;
     940           1 :         if( nFoundEnd == nLastPos)
     941           0 :             ++nFoundEnd;
     942           1 :     } while( lcl_findRegex( pRegexMatcher, nFoundEnd, nIcuErr));
     943             : 
     944             :     // Ignore all zero-length matches except "$" anchor on first match.
     945           5 :     if (nGoodPos == nGoodEnd)
     946             :     {
     947           0 :         if (bFirst && nLastPos == startPos)
     948           0 :             nGoodPos = nLastPos;
     949             :         else
     950           0 :             return aRet;
     951             :     }
     952             : 
     953             :     // find last match again to get its details
     954           5 :     lcl_findRegex( pRegexMatcher, nGoodPos, nIcuErr);
     955             : 
     956             :     // fill in the details of the last match
     957           5 :     const int nGroupCount = pRegexMatcher->groupCount();
     958           5 :     aRet.subRegExpressions = nGroupCount + 1;
     959           5 :     aRet.startOffset.realloc( aRet.subRegExpressions);
     960           5 :     aRet.endOffset.realloc( aRet.subRegExpressions);
     961             :     // NOTE: existing users of backward search seem to expect startOfs/endOfs being inverted!
     962           5 :     aRet.startOffset[0] = pRegexMatcher->end( nIcuErr);
     963           5 :     aRet.endOffset[0]   = pRegexMatcher->start( nIcuErr);
     964           7 :     for( int i = 1; i <= nGroupCount; ++i) {
     965           2 :         aRet.startOffset[i] = pRegexMatcher->end( i, nIcuErr);
     966           2 :         aRet.endOffset[i]   = pRegexMatcher->start( i, nIcuErr);
     967             :     }
     968             : 
     969           5 :     return aRet;
     970             : }
     971             : 
     972             : 
     973             : 
     974             : // search for words phonetically
     975           0 : SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
     976             :                                           sal_Int32 startPos, sal_Int32 endPos )
     977             :             throw(RuntimeException)
     978             : {
     979           0 :     SearchResult aRet;
     980           0 :     aRet.subRegExpressions = 0;
     981             : 
     982           0 :     if( !xBreak.is() )
     983           0 :         return aRet;
     984             : 
     985           0 :     OUString aWTemp( searchStr );
     986             : 
     987             :     sal_Int32 nStt, nEnd;
     988             : 
     989           0 :     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
     990             :             aSrchPara.Locale,
     991           0 :             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
     992             : 
     993           0 :     do
     994             :     {
     995           0 :         if( aWBnd.startPos >= endPos )
     996           0 :             break;
     997           0 :         nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
     998           0 :         nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
     999             : 
    1000           0 :         if( nStt < nEnd &&
    1001           0 :                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
    1002             :         {
    1003           0 :             aRet.subRegExpressions = 1;
    1004           0 :             aRet.startOffset.realloc( 1 );
    1005           0 :             aRet.startOffset[ 0 ] = nStt;
    1006           0 :             aRet.endOffset.realloc( 1 );
    1007           0 :             aRet.endOffset[ 0 ] = nEnd;
    1008           0 :             break;
    1009             :         }
    1010             : 
    1011           0 :         nStt = nEnd - 1;
    1012           0 :         aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
    1013           0 :                 WordType::ANYWORD_IGNOREWHITESPACES);
    1014           0 :     } while( aWBnd.startPos != aWBnd.endPos ||
    1015           0 :             (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
    1016             :     // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
    1017             :     // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
    1018             :     // and nextWord() does also => don't loop forever.
    1019           0 :     return aRet;
    1020             : }
    1021             : 
    1022           0 : SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
    1023             :                                           sal_Int32 startPos, sal_Int32 endPos )
    1024             :             throw(RuntimeException)
    1025             : {
    1026           0 :     SearchResult aRet;
    1027           0 :     aRet.subRegExpressions = 0;
    1028             : 
    1029           0 :     if( !xBreak.is() )
    1030           0 :         return aRet;
    1031             : 
    1032           0 :     OUString aWTemp( searchStr );
    1033             : 
    1034             :     sal_Int32 nStt, nEnd;
    1035             : 
    1036           0 :     Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
    1037             :             aSrchPara.Locale,
    1038           0 :             WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
    1039             : 
    1040           0 :     do
    1041             :     {
    1042           0 :         if( aWBnd.endPos <= endPos )
    1043           0 :             break;
    1044           0 :         nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
    1045           0 :         nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
    1046             : 
    1047           0 :         if( nStt < nEnd &&
    1048           0 :                 pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
    1049             :         {
    1050           0 :             aRet.subRegExpressions = 1;
    1051           0 :             aRet.startOffset.realloc( 1 );
    1052           0 :             aRet.startOffset[ 0 ] = nEnd;
    1053           0 :             aRet.endOffset.realloc( 1 );
    1054           0 :             aRet.endOffset[ 0 ] = nStt;
    1055           0 :             break;
    1056             :         }
    1057           0 :         if( !nStt )
    1058           0 :             break;
    1059             : 
    1060           0 :         aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
    1061           0 :                 WordType::ANYWORD_IGNOREWHITESPACES);
    1062           0 :     } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
    1063           0 :     return aRet;
    1064             : }
    1065             : 
    1066             : 
    1067             : static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
    1068             : 
    1069          15 : static OUString getServiceName_Static()
    1070             : {
    1071          15 :     return OUString( "com.sun.star.util.TextSearch" );
    1072             : }
    1073             : 
    1074          15 : static OUString getImplementationName_Static()
    1075             : {
    1076          15 :     return OUString( cSearchImpl );
    1077             : }
    1078             : 
    1079             : OUString SAL_CALL
    1080           1 : TextSearch::getImplementationName()
    1081             :                 throw( RuntimeException, std::exception )
    1082             : {
    1083           1 :     return getImplementationName_Static();
    1084             : }
    1085             : 
    1086           0 : sal_Bool SAL_CALL TextSearch::supportsService(const OUString& rServiceName)
    1087             :                 throw( RuntimeException, std::exception )
    1088             : {
    1089           0 :     return cppu::supportsService(this, rServiceName);
    1090             : }
    1091             : 
    1092             : Sequence< OUString > SAL_CALL
    1093           1 : TextSearch::getSupportedServiceNames() throw( RuntimeException, std::exception )
    1094             : {
    1095           1 :     Sequence< OUString > aRet(1);
    1096           1 :     aRet[0] = getServiceName_Static();
    1097           1 :     return aRet;
    1098             : }
    1099             : 
    1100             : ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
    1101          74 : SAL_CALL TextSearch_CreateInstance(
    1102             :         const ::com::sun::star::uno::Reference<
    1103             :         ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
    1104             : {
    1105             :     return ::com::sun::star::uno::Reference<
    1106             :         ::com::sun::star::uno::XInterface >(
    1107             :                 static_cast<cppu::OWeakObject*>(new TextSearch(
    1108          74 :                         comphelper::getComponentContext( rxMSF ) )) );
    1109             : }
    1110             : 
    1111             : extern "C"
    1112             : {
    1113             : SAL_DLLPUBLIC_EXPORT void* SAL_CALL
    1114          14 : i18nsearch_component_getFactory( const sal_Char* sImplementationName,
    1115             :                                  void* _pServiceManager,
    1116             :                                  SAL_UNUSED_PARAMETER void* )
    1117             : {
    1118          14 :     void* pRet = NULL;
    1119             : 
    1120             :     ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
    1121             :         static_cast< ::com::sun::star::lang::XMultiServiceFactory* >
    1122          14 :             ( _pServiceManager );
    1123             :     ::com::sun::star::uno::Reference<
    1124          14 :             ::com::sun::star::lang::XSingleServiceFactory > xFactory;
    1125             : 
    1126          14 :     if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
    1127             :     {
    1128          14 :         ::com::sun::star::uno::Sequence< OUString > aServiceNames(1);
    1129          14 :         aServiceNames[0] = getServiceName_Static();
    1130          28 :         xFactory = ::cppu::createSingleFactory(
    1131             :                 pServiceManager, getImplementationName_Static(),
    1132          28 :                 &TextSearch_CreateInstance, aServiceNames );
    1133             :     }
    1134             : 
    1135          14 :     if ( xFactory.is() )
    1136             :     {
    1137          14 :         xFactory->acquire();
    1138          14 :         pRet = xFactory.get();
    1139             :     }
    1140             : 
    1141          14 :     return pRet;
    1142             : }
    1143             : 
    1144             : } // extern "C"
    1145             : 
    1146             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11