Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "textsearch.hxx"
21 : #include "levdis.hxx"
22 : #include <com/sun/star/lang/Locale.hpp>
23 : #include <com/sun/star/lang/XMultiServiceFactory.hpp>
24 : #include <comphelper/processfactory.hxx>
25 : #include <com/sun/star/i18n/BreakIterator.hpp>
26 : #include <com/sun/star/i18n/UnicodeType.hpp>
27 : #include <com/sun/star/util/SearchFlags.hpp>
28 : #include <com/sun/star/i18n/WordType.hpp>
29 : #include <com/sun/star/i18n/ScriptType.hpp>
30 : #include <com/sun/star/i18n/CharacterIteratorMode.hpp>
31 : #include <com/sun/star/i18n/CharacterClassification.hpp>
32 : #include <com/sun/star/i18n/KCharacterType.hpp>
33 : #include <com/sun/star/i18n/Transliteration.hpp>
34 : #include <com/sun/star/registry/XRegistryKey.hpp>
35 : #include <cppuhelper/factory.hxx>
36 : #include <cppuhelper/supportsservice.hxx>
37 : #include <cppuhelper/weak.hxx>
38 : #include <sal/log.hxx>
39 :
40 : #ifdef _MSC_VER
41 : // get rid of that dumb compiler warning
42 : // identifier was truncated to '255' characters in the debug information
43 : // for STL template usage, if .pdb files are to be created
44 : #pragma warning( disable: 4786 )
45 : #endif
46 :
47 : #include <string.h>
48 :
49 : using namespace ::com::sun::star::util;
50 : using namespace ::com::sun::star::uno;
51 : using namespace ::com::sun::star::lang;
52 : using namespace ::com::sun::star::i18n;
53 : using namespace ::com::sun::star;
54 :
55 : const sal_Int32 COMPLEX_TRANS_MASK =
56 : TransliterationModules_ignoreBaFa_ja_JP |
57 : TransliterationModules_ignoreIterationMark_ja_JP |
58 : TransliterationModules_ignoreTiJi_ja_JP |
59 : TransliterationModules_ignoreHyuByu_ja_JP |
60 : TransliterationModules_ignoreSeZe_ja_JP |
61 : TransliterationModules_ignoreIandEfollowedByYa_ja_JP |
62 : TransliterationModules_ignoreKiKuFollowedBySa_ja_JP |
63 : TransliterationModules_ignoreProlongedSoundMark_ja_JP;
64 :
65 : namespace
66 : {
67 0 : sal_Int32 maskComplexTrans( sal_Int32 n )
68 : {
69 : // IGNORE_KANA and FULLWIDTH_HALFWIDTH are simple but need to take effect
70 : // in complex transliteration.
71 : return
72 0 : (n & COMPLEX_TRANS_MASK) | // all set ignore bits
73 : TransliterationModules_IGNORE_KANA | // plus IGNORE_KANA bit
74 0 : TransliterationModules_FULLWIDTH_HALFWIDTH; // and the FULLWIDTH_HALFWIDTH value
75 : }
76 :
77 156 : bool isComplexTrans( sal_Int32 n )
78 : {
79 156 : return n & COMPLEX_TRANS_MASK;
80 : }
81 :
82 281 : sal_Int32 maskSimpleTrans( sal_Int32 n )
83 : {
84 281 : return n & ~COMPLEX_TRANS_MASK;
85 : }
86 :
87 218 : bool isSimpleTrans( sal_Int32 n )
88 : {
89 218 : return maskSimpleTrans(n) != 0;
90 : }
91 :
92 : // Regex patterns are case sensitive.
93 21 : sal_Int32 maskSimpleRegexTrans( sal_Int32 n )
94 : {
95 21 : sal_Int32 m = (n & TransliterationModules_IGNORE_MASK) & ~TransliterationModules_IGNORE_CASE;
96 21 : sal_Int32 v = n & TransliterationModules_NON_IGNORE_MASK;
97 21 : if (v == TransliterationModules_UPPERCASE_LOWERCASE || v == TransliterationModules_LOWERCASE_UPPERCASE)
98 0 : v = 0;
99 21 : return (m | v) & ~COMPLEX_TRANS_MASK;
100 : }
101 :
102 19 : bool isSimpleRegexTrans( sal_Int32 n )
103 : {
104 19 : return maskSimpleRegexTrans(n) != 0;
105 : }
106 : };
107 :
108 74 : TextSearch::TextSearch(const Reference < XComponentContext > & rxContext)
109 : : m_xContext( rxContext )
110 : , pJumpTable( 0 )
111 : , pJumpTable2( 0 )
112 : , pRegexMatcher( NULL )
113 74 : , pWLD( 0 )
114 : {
115 74 : SearchOptions aOpt;
116 74 : aOpt.algorithmType = SearchAlgorithms_ABSOLUTE;
117 74 : aOpt.searchFlag = SearchFlags::ALL_IGNORE_CASE;
118 : //aOpt.Locale = ???;
119 74 : setOptions( aOpt );
120 74 : }
121 :
122 186 : TextSearch::~TextSearch()
123 : {
124 62 : delete pRegexMatcher;
125 62 : delete pWLD;
126 62 : delete pJumpTable;
127 62 : delete pJumpTable2;
128 124 : }
129 :
130 146 : void TextSearch::setOptions( const SearchOptions& rOptions ) throw( RuntimeException, std::exception )
131 : {
132 146 : aSrchPara = rOptions;
133 :
134 146 : delete pRegexMatcher, pRegexMatcher = NULL;
135 146 : delete pWLD, pWLD = 0;
136 146 : delete pJumpTable, pJumpTable = 0;
137 146 : delete pJumpTable2, pJumpTable2 = 0;
138 :
139 : // Create Transliteration class
140 146 : if( isSimpleTrans( aSrchPara.transliterateFlags) )
141 : {
142 62 : if( !xTranslit.is() )
143 62 : xTranslit.set( Transliteration::create( m_xContext ) );
144 62 : xTranslit->loadModule(
145 62 : (TransliterationModules) maskSimpleTrans( aSrchPara.transliterateFlags),
146 124 : aSrchPara.Locale);
147 : }
148 84 : else if( xTranslit.is() )
149 0 : xTranslit = 0;
150 :
151 : // Create Transliteration for 2<->1, 2<->2 transliteration
152 146 : if ( isComplexTrans( aSrchPara.transliterateFlags) )
153 : {
154 0 : if( !xTranslit2.is() )
155 0 : xTranslit2.set( Transliteration::create( m_xContext ) );
156 : // Load transliteration module
157 0 : xTranslit2->loadModule(
158 0 : (TransliterationModules) maskComplexTrans( aSrchPara.transliterateFlags),
159 0 : aSrchPara.Locale);
160 : }
161 :
162 146 : if ( !xBreak.is() )
163 74 : xBreak = com::sun::star::i18n::BreakIterator::create( m_xContext );
164 :
165 146 : sSrchStr = aSrchPara.searchString;
166 :
167 : // Transliterate search string.
168 146 : if (aSrchPara.algorithmType == SearchAlgorithms_REGEXP)
169 : {
170 19 : if (isSimpleRegexTrans( aSrchPara.transliterateFlags))
171 : {
172 2 : if (maskSimpleRegexTrans( aSrchPara.transliterateFlags) !=
173 1 : maskSimpleTrans( aSrchPara.transliterateFlags))
174 : {
175 : com::sun::star::uno::Reference< XExtendedTransliteration > xTranslitPattern(
176 1 : Transliteration::create( m_xContext ));
177 1 : if (xTranslitPattern.is())
178 : {
179 1 : xTranslitPattern->loadModule(
180 1 : (TransliterationModules) maskSimpleRegexTrans( aSrchPara.transliterateFlags),
181 2 : aSrchPara.Locale);
182 3 : sSrchStr = xTranslitPattern->transliterateString2String(
183 2 : aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
184 1 : }
185 : }
186 : else
187 : {
188 0 : if (xTranslit.is())
189 0 : sSrchStr = xTranslit->transliterateString2String(
190 0 : aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
191 : }
192 : // xTranslit2 complex transliterated sSrchStr2 is not used in
193 : // regex, see TextSearch::searchForward() and
194 : // TextSearch::searchBackward()
195 : }
196 : }
197 : else
198 : {
199 127 : if ( xTranslit.is() && isSimpleTrans( aSrchPara.transliterateFlags) )
200 159 : sSrchStr = xTranslit->transliterateString2String(
201 106 : aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
202 :
203 127 : if ( xTranslit2.is() && isComplexTrans( aSrchPara.transliterateFlags) )
204 0 : sSrchStr2 = xTranslit2->transliterateString2String(
205 0 : aSrchPara.searchString, 0, aSrchPara.searchString.getLength());
206 : }
207 :
208 : // When start or end of search string is a complex script type, we need to
209 : // make sure the result boundary is not located in the middle of cell.
210 146 : checkCTLStart = (xBreak.is() && (xBreak->getScriptType(sSrchStr, 0) ==
211 146 : ScriptType::COMPLEX));
212 292 : checkCTLEnd = (xBreak.is() && (xBreak->getScriptType(sSrchStr,
213 292 : sSrchStr.getLength()-1) == ScriptType::COMPLEX));
214 :
215 146 : switch( aSrchPara.algorithmType)
216 : {
217 : case SearchAlgorithms_REGEXP:
218 19 : fnForward = &TextSearch::RESrchFrwrd;
219 19 : fnBackward = &TextSearch::RESrchBkwrd;
220 19 : RESrchPrepare( aSrchPara);
221 19 : break;
222 :
223 : case SearchAlgorithms_APPROXIMATE:
224 0 : fnForward = &TextSearch::ApproxSrchFrwrd;
225 0 : fnBackward = &TextSearch::ApproxSrchBkwrd;
226 :
227 0 : pWLD = new WLevDistance( sSrchStr.getStr(), aSrchPara.changedChars,
228 : aSrchPara.insertedChars, aSrchPara.deletedChars,
229 0 : 0 != (SearchFlags::LEV_RELAXED & aSrchPara.searchFlag ) );
230 :
231 0 : nLimit = pWLD->GetLimit();
232 0 : break;
233 :
234 : default:
235 127 : fnForward = &TextSearch::NSrchFrwrd;
236 127 : fnBackward = &TextSearch::NSrchBkwrd;
237 127 : break;
238 : }
239 146 : }
240 :
241 12 : sal_Int32 FindPosInSeq_Impl( const Sequence <sal_Int32>& rOff, sal_Int32 nPos )
242 : {
243 12 : sal_Int32 nRet = 0, nEnd = rOff.getLength();
244 12 : while( nRet < nEnd && nPos > rOff[ nRet ] ) ++nRet;
245 12 : return nRet;
246 : }
247 :
248 0 : bool TextSearch::isCellStart(const OUString& searchStr, sal_Int32 nPos)
249 : throw( RuntimeException )
250 : {
251 : sal_Int32 nDone;
252 0 : return nPos == xBreak->previousCharacters(searchStr, nPos+1,
253 0 : aSrchPara.Locale, CharacterIteratorMode::SKIPCELL, 1, nDone);
254 : }
255 :
256 1109 : SearchResult TextSearch::searchForward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
257 : throw( RuntimeException, std::exception )
258 : {
259 1109 : SearchResult sres;
260 :
261 2218 : OUString in_str(searchStr);
262 :
263 1109 : bUsePrimarySrchStr = true;
264 :
265 1109 : if ( xTranslit.is() )
266 : {
267 : // apply normal transliteration (1<->1, 1<->0)
268 358 : com::sun::star::uno::Sequence<sal_Int32> offset(endPos - startPos);
269 358 : in_str = xTranslit->transliterate( searchStr, startPos, endPos - startPos, offset );
270 :
271 : // JP 20.6.2001: also the start and end positions must be corrected!
272 : sal_Int32 newStartPos =
273 358 : (startPos == 0) ? 0 : FindPosInSeq_Impl( offset, startPos );
274 :
275 358 : sal_Int32 newEndPos = (endPos < searchStr.getLength())
276 : ? FindPosInSeq_Impl( offset, endPos )
277 358 : : in_str.getLength();
278 :
279 358 : sal_Int32 nExtraOffset = 0;
280 358 : if (pRegexMatcher && startPos > 0)
281 : {
282 : // avoid matching ^ here - in_str omits a prefix of the searchStr
283 : // this is a really lame way to do it, but ICU only offers
284 : // useAnchoringBounds() to disable *both* bounds but what is needed
285 : // here is to disable only one bound and respect the other
286 2 : in_str = "X" + in_str;
287 2 : nExtraOffset = 1;
288 2 : newStartPos += nExtraOffset;
289 2 : newEndPos += nExtraOffset;
290 : }
291 :
292 358 : sres = (this->*fnForward)( in_str, newStartPos, newEndPos );
293 :
294 : // Map offsets back to untransliterated string.
295 358 : const sal_Int32 nOffsets = offset.getLength();
296 358 : if (nOffsets)
297 : {
298 : // For regex nGroups is the number of groups+1 with group 0 being
299 : // the entire match.
300 356 : const sal_Int32 nGroups = sres.startOffset.getLength();
301 436 : for ( sal_Int32 k = 0; k < nGroups; k++ )
302 : {
303 80 : const sal_Int32 nStart = sres.startOffset[k] - nExtraOffset;
304 80 : if (startPos > 0 || nStart > 0)
305 16 : sres.startOffset[k] = (nStart < nOffsets ? offset[nStart] : (offset[nOffsets - 1] + 1));
306 : // JP 20.6.2001: end is ever exclusive and then don't return
307 : // the position of the next character - return the
308 : // next position behind the last found character!
309 : // "a b c" find "b" must return 2,3 and not 2,4!!!
310 80 : const sal_Int32 nStop = sres.endOffset[k] - nExtraOffset;
311 80 : if (startPos > 0 || nStop > 0)
312 80 : sres.endOffset[k] = offset[(nStop <= nOffsets ? nStop : nOffsets) - 1] + 1;
313 : }
314 358 : }
315 : }
316 : else
317 : {
318 751 : sres = (this->*fnForward)( in_str, startPos, endPos );
319 : }
320 :
321 1109 : if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP)
322 : {
323 0 : SearchResult sres2;
324 :
325 0 : in_str = OUString(searchStr);
326 0 : com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
327 :
328 0 : in_str = xTranslit2->transliterate( searchStr, 0, in_str.getLength(), offset );
329 :
330 0 : if( startPos )
331 0 : startPos = FindPosInSeq_Impl( offset, startPos );
332 :
333 0 : if( endPos < searchStr.getLength() )
334 0 : endPos = FindPosInSeq_Impl( offset, endPos );
335 : else
336 0 : endPos = in_str.getLength();
337 :
338 0 : bUsePrimarySrchStr = false;
339 0 : sres2 = (this->*fnForward)( in_str, startPos, endPos );
340 :
341 0 : for ( int k = 0; k < sres2.startOffset.getLength(); k++ )
342 : {
343 0 : if (sres2.startOffset[k])
344 0 : sres2.startOffset[k] = offset[sres2.startOffset[k]-1] + 1;
345 0 : if (sres2.endOffset[k])
346 0 : sres2.endOffset[k] = offset[sres2.endOffset[k]-1] + 1;
347 : }
348 :
349 : // pick first and long one
350 0 : if ( sres.subRegExpressions == 0)
351 0 : return sres2;
352 0 : if ( sres2.subRegExpressions == 1)
353 : {
354 0 : if ( sres.startOffset[0] > sres2.startOffset[0])
355 0 : return sres2;
356 0 : else if ( sres.startOffset[0] == sres2.startOffset[0] &&
357 0 : sres.endOffset[0] < sres2.endOffset[0])
358 0 : return sres2;
359 0 : }
360 : }
361 :
362 2218 : return sres;
363 : }
364 :
365 12 : SearchResult TextSearch::searchBackward( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
366 : throw(RuntimeException, std::exception)
367 : {
368 12 : SearchResult sres;
369 :
370 24 : OUString in_str(searchStr);
371 :
372 12 : bUsePrimarySrchStr = true;
373 :
374 12 : if ( xTranslit.is() )
375 : {
376 : // apply only simple 1<->1 transliteration here
377 11 : com::sun::star::uno::Sequence<sal_Int32> offset(startPos - endPos);
378 11 : in_str = xTranslit->transliterate( searchStr, endPos, startPos - endPos, offset );
379 :
380 : // JP 20.6.2001: also the start and end positions must be corrected!
381 11 : sal_Int32 const newStartPos = (startPos < searchStr.getLength())
382 : ? FindPosInSeq_Impl( offset, startPos )
383 11 : : in_str.getLength();
384 :
385 : sal_Int32 const newEndPos =
386 11 : (endPos == 0) ? 0 : FindPosInSeq_Impl( offset, endPos );
387 :
388 : // TODO: this would need nExtraOffset handling to avoid $ matching
389 : // if (pRegexMatcher && startPos < searchStr.getLength())
390 : // but that appears to be impossible with ICU regex
391 :
392 11 : sres = (this->*fnBackward)( in_str, newStartPos, newEndPos );
393 :
394 : // Map offsets back to untransliterated string.
395 11 : const sal_Int32 nOffsets = offset.getLength();
396 11 : if (nOffsets)
397 : {
398 : // For regex nGroups is the number of groups+1 with group 0 being
399 : // the entire match.
400 11 : const sal_Int32 nGroups = sres.startOffset.getLength();
401 20 : for ( sal_Int32 k = 0; k < nGroups; k++ )
402 : {
403 9 : const sal_Int32 nStart = sres.startOffset[k];
404 9 : if (endPos > 0 || nStart > 0)
405 9 : sres.startOffset[k] = offset[(nStart <= nOffsets ? nStart : nOffsets) - 1] + 1;
406 : // JP 20.6.2001: end is ever exclusive and then don't return
407 : // the position of the next character - return the
408 : // next position behind the last found character!
409 : // "a b c" find "b" must return 2,3 and not 2,4!!!
410 9 : const sal_Int32 nStop = sres.endOffset[k];
411 9 : if (endPos > 0 || nStop > 0)
412 5 : sres.endOffset[k] = (nStop < nOffsets ? offset[nStop] : (offset[nOffsets - 1] + 1));
413 : }
414 11 : }
415 : }
416 : else
417 : {
418 1 : sres = (this->*fnBackward)( in_str, startPos, endPos );
419 : }
420 :
421 12 : if ( xTranslit2.is() && aSrchPara.algorithmType != SearchAlgorithms_REGEXP )
422 : {
423 0 : SearchResult sres2;
424 :
425 0 : in_str = OUString(searchStr);
426 0 : com::sun::star::uno::Sequence <sal_Int32> offset( in_str.getLength());
427 :
428 0 : in_str = xTranslit2->transliterate(searchStr, 0, in_str.getLength(), offset);
429 :
430 0 : if( startPos < searchStr.getLength() )
431 0 : startPos = FindPosInSeq_Impl( offset, startPos );
432 : else
433 0 : startPos = in_str.getLength();
434 :
435 0 : if( endPos )
436 0 : endPos = FindPosInSeq_Impl( offset, endPos );
437 :
438 0 : bUsePrimarySrchStr = false;
439 0 : sres2 = (this->*fnBackward)( in_str, startPos, endPos );
440 :
441 0 : for( int k = 0; k < sres2.startOffset.getLength(); k++ )
442 : {
443 0 : if (sres2.startOffset[k])
444 0 : sres2.startOffset[k] = offset[sres2.startOffset[k]-1]+1;
445 0 : if (sres2.endOffset[k])
446 0 : sres2.endOffset[k] = offset[sres2.endOffset[k]-1]+1;
447 : }
448 :
449 : // pick last and long one
450 0 : if ( sres.subRegExpressions == 0 )
451 0 : return sres2;
452 0 : if ( sres2.subRegExpressions == 1 )
453 : {
454 0 : if ( sres.startOffset[0] < sres2.startOffset[0] )
455 0 : return sres2;
456 0 : if ( sres.startOffset[0] == sres2.startOffset[0] &&
457 0 : sres.endOffset[0] > sres2.endOffset[0] )
458 0 : return sres2;
459 0 : }
460 : }
461 :
462 24 : return sres;
463 : }
464 :
465 :
466 :
467 0 : bool TextSearch::IsDelimiter( const OUString& rStr, sal_Int32 nPos ) const
468 : {
469 0 : bool bRet = true;
470 0 : if( '\x7f' != rStr[nPos])
471 : {
472 0 : if ( !xCharClass.is() )
473 0 : xCharClass = CharacterClassification::create( m_xContext );
474 0 : sal_Int32 nCType = xCharClass->getCharacterType( rStr, nPos,
475 0 : aSrchPara.Locale );
476 0 : if( 0 != (( KCharacterType::DIGIT | KCharacterType::ALPHA |
477 0 : KCharacterType::LETTER ) & nCType ) )
478 0 : bRet = false;
479 : }
480 0 : return bRet;
481 : }
482 :
483 : // --------- helper methods for Boyer-Moore like text searching ----------
484 : // TODO: use ICU's regex UREGEX_LITERAL mode instead when it becomes available
485 :
486 130 : void TextSearch::MakeForwardTab()
487 : {
488 : // create the jumptable for the search text
489 130 : if( pJumpTable )
490 : {
491 85 : if( bIsForwardTab )
492 215 : return ; // the jumpTable is ok
493 0 : delete pJumpTable;
494 : }
495 45 : bIsForwardTab = true;
496 :
497 45 : sal_Int32 n, nLen = sSrchStr.getLength();
498 45 : pJumpTable = new TextSearchJumpTable;
499 :
500 299 : for( n = 0; n < nLen - 1; ++n )
501 : {
502 254 : sal_Unicode cCh = sSrchStr[n];
503 254 : sal_Int32 nDiff = nLen - n - 1;
504 254 : TextSearchJumpTable::value_type aEntry( cCh, nDiff );
505 :
506 : ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
507 254 : pJumpTable->insert( aEntry );
508 254 : if ( !aPair.second )
509 67 : (*(aPair.first)).second = nDiff;
510 : }
511 : }
512 :
513 0 : void TextSearch::MakeForwardTab2()
514 : {
515 : // create the jumptable for the search text
516 0 : if( pJumpTable2 )
517 : {
518 0 : if( bIsForwardTab )
519 0 : return ; // the jumpTable is ok
520 0 : delete pJumpTable2;
521 : }
522 0 : bIsForwardTab = true;
523 :
524 0 : sal_Int32 n, nLen = sSrchStr2.getLength();
525 0 : pJumpTable2 = new TextSearchJumpTable;
526 :
527 0 : for( n = 0; n < nLen - 1; ++n )
528 : {
529 0 : sal_Unicode cCh = sSrchStr2[n];
530 0 : sal_Int32 nDiff = nLen - n - 1;
531 :
532 0 : TextSearchJumpTable::value_type aEntry( cCh, nDiff );
533 : ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
534 0 : pJumpTable2->insert( aEntry );
535 0 : if ( !aPair.second )
536 0 : (*(aPair.first)).second = nDiff;
537 : }
538 : }
539 :
540 7 : void TextSearch::MakeBackwardTab()
541 : {
542 : // create the jumptable for the search text
543 7 : if( pJumpTable )
544 : {
545 3 : if( !bIsForwardTab )
546 9 : return ; // the jumpTable is ok
547 1 : delete pJumpTable;
548 : }
549 5 : bIsForwardTab = false;
550 :
551 5 : sal_Int32 n, nLen = sSrchStr.getLength();
552 5 : pJumpTable = new TextSearchJumpTable;
553 :
554 25 : for( n = nLen-1; n > 0; --n )
555 : {
556 20 : sal_Unicode cCh = sSrchStr[n];
557 20 : TextSearchJumpTable::value_type aEntry( cCh, n );
558 : ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
559 20 : pJumpTable->insert( aEntry );
560 20 : if ( !aPair.second )
561 0 : (*(aPair.first)).second = n;
562 : }
563 : }
564 :
565 0 : void TextSearch::MakeBackwardTab2()
566 : {
567 : // create the jumptable for the search text
568 0 : if( pJumpTable2 )
569 : {
570 0 : if( !bIsForwardTab )
571 0 : return ; // the jumpTable is ok
572 0 : delete pJumpTable2;
573 : }
574 0 : bIsForwardTab = false;
575 :
576 0 : sal_Int32 n, nLen = sSrchStr2.getLength();
577 0 : pJumpTable2 = new TextSearchJumpTable;
578 :
579 0 : for( n = nLen-1; n > 0; --n )
580 : {
581 0 : sal_Unicode cCh = sSrchStr2[n];
582 0 : TextSearchJumpTable::value_type aEntry( cCh, n );
583 : ::std::pair< TextSearchJumpTable::iterator, bool > aPair =
584 0 : pJumpTable2->insert( aEntry );
585 0 : if ( !aPair.second )
586 0 : (*(aPair.first)).second = n;
587 : }
588 : }
589 :
590 326 : sal_Int32 TextSearch::GetDiff( const sal_Unicode cChr ) const
591 : {
592 : TextSearchJumpTable *pJump;
593 326 : OUString sSearchKey;
594 :
595 326 : if ( bUsePrimarySrchStr ) {
596 326 : pJump = pJumpTable;
597 326 : sSearchKey = sSrchStr;
598 : } else {
599 0 : pJump = pJumpTable2;
600 0 : sSearchKey = sSrchStr2;
601 : }
602 :
603 326 : TextSearchJumpTable::const_iterator iLook = pJump->find( cChr );
604 326 : if ( iLook == pJump->end() )
605 262 : return sSearchKey.getLength();
606 64 : return (*iLook).second;
607 : }
608 :
609 :
610 319 : SearchResult TextSearch::NSrchFrwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
611 : throw(RuntimeException)
612 : {
613 319 : SearchResult aRet;
614 319 : aRet.subRegExpressions = 0;
615 :
616 638 : OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
617 :
618 638 : OUString aStr( searchStr );
619 319 : sal_Int32 nSuchIdx = aStr.getLength();
620 319 : sal_Int32 nEnde = endPos;
621 319 : if( !nSuchIdx || !sSearchKey.getLength() || sSearchKey.getLength() > nSuchIdx )
622 189 : return aRet;
623 :
624 :
625 130 : if( nEnde < sSearchKey.getLength() ) // position inside the search region ?
626 0 : return aRet;
627 :
628 130 : nEnde -= sSearchKey.getLength();
629 :
630 130 : if (bUsePrimarySrchStr)
631 130 : MakeForwardTab(); // create the jumptable
632 : else
633 0 : MakeForwardTab2();
634 :
635 427 : for (sal_Int32 nCmpIdx = startPos; // start position for the search
636 : nCmpIdx <= nEnde;
637 297 : nCmpIdx += GetDiff( aStr[nCmpIdx + sSearchKey.getLength()-1]))
638 : {
639 : // if the match would be the completed cells, skip it.
640 357 : if ( (checkCTLStart && !isCellStart( aStr, nCmpIdx )) || (checkCTLEnd
641 0 : && !isCellStart( aStr, nCmpIdx + sSearchKey.getLength())) )
642 0 : continue;
643 :
644 357 : nSuchIdx = sSearchKey.getLength() - 1;
645 1170 : while( nSuchIdx >= 0 && sSearchKey[nSuchIdx] == aStr[nCmpIdx + nSuchIdx])
646 : {
647 516 : if( nSuchIdx == 0 )
648 : {
649 60 : if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
650 : {
651 0 : sal_Int32 nFndEnd = nCmpIdx + sSearchKey.getLength();
652 0 : bool bAtStart = !nCmpIdx;
653 0 : bool bAtEnd = nFndEnd == endPos;
654 0 : bool bDelimBefore = bAtStart || IsDelimiter( aStr, nCmpIdx-1 );
655 0 : bool bDelimBehind = bAtEnd || IsDelimiter( aStr, nFndEnd );
656 : // * 1 -> only one word in the paragraph
657 : // * 2 -> at begin of paragraph
658 : // * 3 -> at end of paragraph
659 : // * 4 -> inside the paragraph
660 0 : if( !( ( bAtStart && bAtEnd ) || // 1
661 0 : ( bAtStart && bDelimBehind ) || // 2
662 0 : ( bAtEnd && bDelimBefore ) || // 3
663 0 : ( bDelimBefore && bDelimBehind ))) // 4
664 : break;
665 : }
666 :
667 60 : aRet.subRegExpressions = 1;
668 60 : aRet.startOffset.realloc( 1 );
669 60 : aRet.startOffset[ 0 ] = nCmpIdx;
670 60 : aRet.endOffset.realloc( 1 );
671 60 : aRet.endOffset[ 0 ] = nCmpIdx + sSearchKey.getLength();
672 :
673 60 : return aRet;
674 : }
675 : else
676 456 : nSuchIdx--;
677 : }
678 : }
679 70 : return aRet;
680 : }
681 :
682 7 : SearchResult TextSearch::NSrchBkwrd( const OUString& searchStr, sal_Int32 startPos, sal_Int32 endPos )
683 : throw(RuntimeException)
684 : {
685 7 : SearchResult aRet;
686 7 : aRet.subRegExpressions = 0;
687 :
688 14 : OUString sSearchKey = bUsePrimarySrchStr ? sSrchStr : sSrchStr2;
689 :
690 14 : OUString aStr( searchStr );
691 7 : sal_Int32 nSuchIdx = aStr.getLength();
692 7 : sal_Int32 nEnde = endPos;
693 7 : if( nSuchIdx == 0 || sSearchKey.isEmpty() || sSearchKey.getLength() > nSuchIdx)
694 0 : return aRet;
695 :
696 7 : if (bUsePrimarySrchStr)
697 7 : MakeBackwardTab(); // create the jumptable
698 : else
699 0 : MakeBackwardTab2();
700 :
701 7 : if( nEnde == nSuchIdx ) // end position for the search
702 0 : nEnde = sSearchKey.getLength();
703 : else
704 7 : nEnde += sSearchKey.getLength();
705 :
706 7 : sal_Int32 nCmpIdx = startPos; // start position for the search
707 :
708 43 : while (nCmpIdx >= nEnde)
709 : {
710 : // if the match would be the completed cells, skip it.
711 68 : if ( (!checkCTLStart || isCellStart( aStr, nCmpIdx -
712 68 : sSearchKey.getLength() )) && (!checkCTLEnd ||
713 0 : isCellStart( aStr, nCmpIdx)))
714 : {
715 34 : nSuchIdx = 0;
716 149 : while( nSuchIdx < sSearchKey.getLength() && sSearchKey[nSuchIdx] ==
717 55 : aStr[nCmpIdx + nSuchIdx - sSearchKey.getLength()] )
718 26 : nSuchIdx++;
719 34 : if( nSuchIdx >= sSearchKey.getLength() )
720 : {
721 5 : if( SearchFlags::NORM_WORD_ONLY & aSrchPara.searchFlag )
722 : {
723 0 : sal_Int32 nFndStt = nCmpIdx - sSearchKey.getLength();
724 0 : bool bAtStart = !nFndStt;
725 0 : bool bAtEnd = nCmpIdx == startPos;
726 0 : bool bDelimBehind = bAtEnd || IsDelimiter( aStr, nCmpIdx );
727 0 : bool bDelimBefore = bAtStart || // begin of paragraph
728 0 : IsDelimiter( aStr, nFndStt-1 );
729 : // * 1 -> only one word in the paragraph
730 : // * 2 -> at begin of paragraph
731 : // * 3 -> at end of paragraph
732 : // * 4 -> inside the paragraph
733 0 : if( ( bAtStart && bAtEnd ) || // 1
734 0 : ( bAtStart && bDelimBehind ) || // 2
735 0 : ( bAtEnd && bDelimBefore ) || // 3
736 0 : ( bDelimBefore && bDelimBehind )) // 4
737 : {
738 0 : aRet.subRegExpressions = 1;
739 0 : aRet.startOffset.realloc( 1 );
740 0 : aRet.startOffset[ 0 ] = nCmpIdx;
741 0 : aRet.endOffset.realloc( 1 );
742 0 : aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
743 0 : return aRet;
744 : }
745 : }
746 : else
747 : {
748 5 : aRet.subRegExpressions = 1;
749 5 : aRet.startOffset.realloc( 1 );
750 5 : aRet.startOffset[ 0 ] = nCmpIdx;
751 5 : aRet.endOffset.realloc( 1 );
752 5 : aRet.endOffset[ 0 ] = nCmpIdx - sSearchKey.getLength();
753 5 : return aRet;
754 : }
755 : }
756 : }
757 29 : nSuchIdx = GetDiff( aStr[nCmpIdx - sSearchKey.getLength()] );
758 29 : if( nCmpIdx < nSuchIdx )
759 0 : return aRet;
760 29 : nCmpIdx -= nSuchIdx;
761 : }
762 2 : return aRet;
763 : }
764 :
765 19 : void TextSearch::RESrchPrepare( const ::com::sun::star::util::SearchOptions& rOptions)
766 : {
767 : // select the transliterated pattern string
768 : const OUString& rPatternStr =
769 19 : (isSimpleTrans( rOptions.transliterateFlags) ? sSrchStr
770 19 : : (isComplexTrans( rOptions.transliterateFlags) ? sSrchStr2 : rOptions.searchString));
771 :
772 19 : sal_uInt32 nIcuSearchFlags = UREGEX_UWORD; // request UAX#29 unicode capability
773 : // map com::sun::star::util::SearchFlags to ICU uregex.h flags
774 : // TODO: REG_EXTENDED, REG_NOT_BEGINOFLINE, REG_NOT_ENDOFLINE
775 : // REG_NEWLINE is neither properly defined nor used anywhere => not implemented
776 : // REG_NOSUB is not used anywhere => not implemented
777 : // NORM_WORD_ONLY is only used for SearchAlgorithm==Absolute
778 : // LEV_RELAXED is only used for SearchAlgorithm==Approximate
779 : // Note that the search flag ALL_IGNORE_CASE is deprecated in UNO
780 : // probably because the transliteration flag IGNORE_CASE handles it as well.
781 19 : if( (rOptions.searchFlag & com::sun::star::util::SearchFlags::ALL_IGNORE_CASE) != 0
782 13 : || (rOptions.transliterateFlags & TransliterationModules_IGNORE_CASE) != 0)
783 10 : nIcuSearchFlags |= UREGEX_CASE_INSENSITIVE;
784 19 : UErrorCode nIcuErr = U_ZERO_ERROR;
785 : // assumption: transliteration didn't mangle regexp control chars
786 19 : IcuUniString aIcuSearchPatStr( reinterpret_cast<const UChar*>(rPatternStr.getStr()), rPatternStr.getLength());
787 : #ifndef DISABLE_WORDBOUND_EMULATION
788 : // for conveniance specific syntax elements of the old regex engine are emulated
789 : // - by replacing \< with "word-break followed by a look-ahead word-char"
790 19 : static const IcuUniString aChevronPatternB( "\\\\<", -1, IcuUniString::kInvariant);
791 19 : static const IcuUniString aChevronReplaceB( "\\\\b(?=\\\\w)", -1, IcuUniString::kInvariant);
792 19 : static RegexMatcher aChevronMatcherB( aChevronPatternB, 0, nIcuErr);
793 19 : aChevronMatcherB.reset( aIcuSearchPatStr);
794 19 : aIcuSearchPatStr = aChevronMatcherB.replaceAll( aChevronReplaceB, nIcuErr);
795 19 : aChevronMatcherB.reset();
796 : // - by replacing \> with "look-behind word-char followed by a word-break"
797 19 : static const IcuUniString aChevronPatternE( "\\\\>", -1, IcuUniString::kInvariant);
798 19 : static const IcuUniString aChevronReplaceE( "(?<=\\\\w)\\\\b", -1, IcuUniString::kInvariant);
799 19 : static RegexMatcher aChevronMatcherE( aChevronPatternE, 0, nIcuErr);
800 19 : aChevronMatcherE.reset( aIcuSearchPatStr);
801 19 : aIcuSearchPatStr = aChevronMatcherE.replaceAll( aChevronReplaceE, nIcuErr);
802 19 : aChevronMatcherE.reset();
803 : #endif
804 19 : pRegexMatcher = new RegexMatcher( aIcuSearchPatStr, nIcuSearchFlags, nIcuErr);
805 19 : if (nIcuErr)
806 : {
807 : SAL_INFO( "i18npool", "TextSearch::RESrchPrepare UErrorCode " << nIcuErr);
808 0 : delete pRegexMatcher;
809 0 : pRegexMatcher = NULL;
810 : }
811 : else
812 : {
813 : // Pathological patterns may result in exponential run time making the
814 : // application appear to be frozen. Limit that. Documentation for this
815 : // call says
816 : // https://ssl.icu-project.org/apiref/icu4c/classicu_1_1RegexMatcher.html#a6ebcfcab4fe6a38678c0291643a03a00
817 : // "The units of the limit are steps of the match engine.
818 : // Correspondence with actual processor time will depend on the speed
819 : // of the processor and the details of the specific pattern, but will
820 : // typically be on the order of milliseconds."
821 : // Just what is a good value? 42 is always an answer ... the 23 enigma
822 : // as well.. which on the dev's machine is roughly 50 seconds with the
823 : // pattern of fdo#70627.
824 : /* TODO: make this a configuration settable value and possibly take
825 : * complexity of expression into account and maybe even length of text
826 : * to be matched; currently (2013-11-25) that is at most one 64k
827 : * paragraph per RESrchFrwrd()/RESrchBkwrd() call. */
828 19 : pRegexMatcher->setTimeLimit( 23*1000, nIcuErr);
829 19 : }
830 19 : }
831 :
832 :
833 :
834 801 : static bool lcl_findRegex( RegexMatcher * pRegexMatcher, sal_Int32 nStartPos, UErrorCode & rIcuErr )
835 : {
836 801 : if (!pRegexMatcher->find( nStartPos, rIcuErr))
837 : {
838 : /* TODO: future versions could pass the UErrorCode or translations
839 : * thereof to the caller, for example to inform the user of
840 : * U_REGEX_TIME_OUT. The strange thing though is that an error is set
841 : * only after the second call that returns immediately and not if
842 : * timeout occurred on the first call?!? */
843 : SAL_INFO( "i18npool", "lcl_findRegex UErrorCode " << rIcuErr);
844 138 : return false;
845 : }
846 663 : return true;
847 : }
848 :
849 790 : SearchResult TextSearch::RESrchFrwrd( const OUString& searchStr,
850 : sal_Int32 startPos, sal_Int32 endPos )
851 : throw(RuntimeException)
852 : {
853 790 : SearchResult aRet;
854 790 : aRet.subRegExpressions = 0;
855 790 : if( !pRegexMatcher)
856 0 : return aRet;
857 :
858 790 : if( endPos > searchStr.getLength())
859 0 : endPos = searchStr.getLength();
860 :
861 : // use the ICU RegexMatcher to find the matches
862 790 : UErrorCode nIcuErr = U_ZERO_ERROR;
863 1580 : const IcuUniString aSearchTargetStr( reinterpret_cast<const UChar*>(searchStr.getStr()), endPos);
864 790 : pRegexMatcher->reset( aSearchTargetStr);
865 : // search until there is a valid match
866 : for(;;)
867 : {
868 790 : if (!lcl_findRegex( pRegexMatcher, startPos, nIcuErr))
869 137 : return aRet;
870 :
871 : // #i118887# ignore zero-length matches e.g. "a*" in "bc"
872 653 : int nStartOfs = pRegexMatcher->start( nIcuErr);
873 653 : int nEndOfs = pRegexMatcher->end( nIcuErr);
874 653 : if( nStartOfs < nEndOfs)
875 652 : break;
876 : // If the zero-length match is behind the string, do not match it again
877 : // and again until startPos reaches there. A match behind the string is
878 : // a "$" anchor.
879 1 : if (nStartOfs == endPos)
880 1 : break;
881 : // try at next position if there was a zero-length match
882 0 : if( ++startPos >= endPos)
883 0 : return aRet;
884 0 : }
885 :
886 : // extract the result of the search
887 653 : const int nGroupCount = pRegexMatcher->groupCount();
888 653 : aRet.subRegExpressions = nGroupCount + 1;
889 653 : aRet.startOffset.realloc( aRet.subRegExpressions);
890 653 : aRet.endOffset.realloc( aRet.subRegExpressions);
891 653 : aRet.startOffset[0] = pRegexMatcher->start( nIcuErr);
892 653 : aRet.endOffset[0] = pRegexMatcher->end( nIcuErr);
893 655 : for( int i = 1; i <= nGroupCount; ++i) {
894 2 : aRet.startOffset[i] = pRegexMatcher->start( i, nIcuErr);
895 2 : aRet.endOffset[i] = pRegexMatcher->end( i, nIcuErr);
896 : }
897 :
898 653 : return aRet;
899 : }
900 :
901 5 : SearchResult TextSearch::RESrchBkwrd( const OUString& searchStr,
902 : sal_Int32 startPos, sal_Int32 endPos )
903 : throw(RuntimeException)
904 : {
905 : // NOTE: for backwards search callers provide startPos/endPos inverted!
906 5 : SearchResult aRet;
907 5 : aRet.subRegExpressions = 0;
908 5 : if( !pRegexMatcher)
909 0 : return aRet;
910 :
911 5 : if( startPos > searchStr.getLength())
912 0 : startPos = searchStr.getLength();
913 :
914 : // use the ICU RegexMatcher to find the matches
915 : // TODO: use ICU's backward searching once it becomes available
916 : // as its replacement using forward search is not as good as the real thing
917 5 : UErrorCode nIcuErr = U_ZERO_ERROR;
918 10 : const IcuUniString aSearchTargetStr( reinterpret_cast<const UChar*>(searchStr.getStr()), startPos);
919 5 : pRegexMatcher->reset( aSearchTargetStr);
920 5 : if (!lcl_findRegex( pRegexMatcher, endPos, nIcuErr))
921 0 : return aRet;
922 :
923 : // find the last match
924 5 : int nLastPos = 0;
925 5 : int nFoundEnd = 0;
926 5 : int nGoodPos = 0, nGoodEnd = 0;
927 5 : bool bFirst = true;
928 1 : do {
929 5 : nLastPos = pRegexMatcher->start( nIcuErr);
930 5 : nFoundEnd = pRegexMatcher->end( nIcuErr);
931 5 : if (nLastPos < nFoundEnd)
932 : {
933 : // remember last non-zero-length match
934 5 : nGoodPos = nLastPos;
935 5 : nGoodEnd = nFoundEnd;
936 : }
937 5 : if( nFoundEnd >= startPos)
938 4 : break;
939 1 : bFirst = false;
940 1 : if( nFoundEnd == nLastPos)
941 0 : ++nFoundEnd;
942 1 : } while( lcl_findRegex( pRegexMatcher, nFoundEnd, nIcuErr));
943 :
944 : // Ignore all zero-length matches except "$" anchor on first match.
945 5 : if (nGoodPos == nGoodEnd)
946 : {
947 0 : if (bFirst && nLastPos == startPos)
948 0 : nGoodPos = nLastPos;
949 : else
950 0 : return aRet;
951 : }
952 :
953 : // find last match again to get its details
954 5 : lcl_findRegex( pRegexMatcher, nGoodPos, nIcuErr);
955 :
956 : // fill in the details of the last match
957 5 : const int nGroupCount = pRegexMatcher->groupCount();
958 5 : aRet.subRegExpressions = nGroupCount + 1;
959 5 : aRet.startOffset.realloc( aRet.subRegExpressions);
960 5 : aRet.endOffset.realloc( aRet.subRegExpressions);
961 : // NOTE: existing users of backward search seem to expect startOfs/endOfs being inverted!
962 5 : aRet.startOffset[0] = pRegexMatcher->end( nIcuErr);
963 5 : aRet.endOffset[0] = pRegexMatcher->start( nIcuErr);
964 7 : for( int i = 1; i <= nGroupCount; ++i) {
965 2 : aRet.startOffset[i] = pRegexMatcher->end( i, nIcuErr);
966 2 : aRet.endOffset[i] = pRegexMatcher->start( i, nIcuErr);
967 : }
968 :
969 5 : return aRet;
970 : }
971 :
972 :
973 :
974 : // search for words phonetically
975 0 : SearchResult TextSearch::ApproxSrchFrwrd( const OUString& searchStr,
976 : sal_Int32 startPos, sal_Int32 endPos )
977 : throw(RuntimeException)
978 : {
979 0 : SearchResult aRet;
980 0 : aRet.subRegExpressions = 0;
981 :
982 0 : if( !xBreak.is() )
983 0 : return aRet;
984 :
985 0 : OUString aWTemp( searchStr );
986 :
987 : sal_Int32 nStt, nEnd;
988 :
989 0 : Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
990 : aSrchPara.Locale,
991 0 : WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
992 :
993 0 : do
994 : {
995 0 : if( aWBnd.startPos >= endPos )
996 0 : break;
997 0 : nStt = aWBnd.startPos < startPos ? startPos : aWBnd.startPos;
998 0 : nEnd = aWBnd.endPos > endPos ? endPos : aWBnd.endPos;
999 :
1000 0 : if( nStt < nEnd &&
1001 0 : pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
1002 : {
1003 0 : aRet.subRegExpressions = 1;
1004 0 : aRet.startOffset.realloc( 1 );
1005 0 : aRet.startOffset[ 0 ] = nStt;
1006 0 : aRet.endOffset.realloc( 1 );
1007 0 : aRet.endOffset[ 0 ] = nEnd;
1008 0 : break;
1009 : }
1010 :
1011 0 : nStt = nEnd - 1;
1012 0 : aWBnd = xBreak->nextWord( aWTemp, nStt, aSrchPara.Locale,
1013 0 : WordType::ANYWORD_IGNOREWHITESPACES);
1014 0 : } while( aWBnd.startPos != aWBnd.endPos ||
1015 0 : (aWBnd.endPos != aWTemp.getLength() && aWBnd.endPos != nEnd) );
1016 : // #i50244# aWBnd.endPos != nEnd : in case there is _no_ word (only
1017 : // whitespace) in searchStr, getWordBoundary() returned startPos,startPos
1018 : // and nextWord() does also => don't loop forever.
1019 0 : return aRet;
1020 : }
1021 :
1022 0 : SearchResult TextSearch::ApproxSrchBkwrd( const OUString& searchStr,
1023 : sal_Int32 startPos, sal_Int32 endPos )
1024 : throw(RuntimeException)
1025 : {
1026 0 : SearchResult aRet;
1027 0 : aRet.subRegExpressions = 0;
1028 :
1029 0 : if( !xBreak.is() )
1030 0 : return aRet;
1031 :
1032 0 : OUString aWTemp( searchStr );
1033 :
1034 : sal_Int32 nStt, nEnd;
1035 :
1036 0 : Boundary aWBnd = xBreak->getWordBoundary( aWTemp, startPos,
1037 : aSrchPara.Locale,
1038 0 : WordType::ANYWORD_IGNOREWHITESPACES, sal_True );
1039 :
1040 0 : do
1041 : {
1042 0 : if( aWBnd.endPos <= endPos )
1043 0 : break;
1044 0 : nStt = aWBnd.startPos < endPos ? endPos : aWBnd.startPos;
1045 0 : nEnd = aWBnd.endPos > startPos ? startPos : aWBnd.endPos;
1046 :
1047 0 : if( nStt < nEnd &&
1048 0 : pWLD->WLD( aWTemp.getStr() + nStt, nEnd - nStt ) <= nLimit )
1049 : {
1050 0 : aRet.subRegExpressions = 1;
1051 0 : aRet.startOffset.realloc( 1 );
1052 0 : aRet.startOffset[ 0 ] = nEnd;
1053 0 : aRet.endOffset.realloc( 1 );
1054 0 : aRet.endOffset[ 0 ] = nStt;
1055 0 : break;
1056 : }
1057 0 : if( !nStt )
1058 0 : break;
1059 :
1060 0 : aWBnd = xBreak->previousWord( aWTemp, nStt, aSrchPara.Locale,
1061 0 : WordType::ANYWORD_IGNOREWHITESPACES);
1062 0 : } while( aWBnd.startPos != aWBnd.endPos || aWBnd.endPos != aWTemp.getLength() );
1063 0 : return aRet;
1064 : }
1065 :
1066 :
1067 : static const sal_Char cSearchImpl[] = "com.sun.star.util.TextSearch_i18n";
1068 :
1069 15 : static OUString getServiceName_Static()
1070 : {
1071 15 : return OUString( "com.sun.star.util.TextSearch" );
1072 : }
1073 :
1074 15 : static OUString getImplementationName_Static()
1075 : {
1076 15 : return OUString( cSearchImpl );
1077 : }
1078 :
1079 : OUString SAL_CALL
1080 1 : TextSearch::getImplementationName()
1081 : throw( RuntimeException, std::exception )
1082 : {
1083 1 : return getImplementationName_Static();
1084 : }
1085 :
1086 0 : sal_Bool SAL_CALL TextSearch::supportsService(const OUString& rServiceName)
1087 : throw( RuntimeException, std::exception )
1088 : {
1089 0 : return cppu::supportsService(this, rServiceName);
1090 : }
1091 :
1092 : Sequence< OUString > SAL_CALL
1093 1 : TextSearch::getSupportedServiceNames() throw( RuntimeException, std::exception )
1094 : {
1095 1 : Sequence< OUString > aRet(1);
1096 1 : aRet[0] = getServiceName_Static();
1097 1 : return aRet;
1098 : }
1099 :
1100 : ::com::sun::star::uno::Reference< ::com::sun::star::uno::XInterface >
1101 74 : SAL_CALL TextSearch_CreateInstance(
1102 : const ::com::sun::star::uno::Reference<
1103 : ::com::sun::star::lang::XMultiServiceFactory >& rxMSF )
1104 : {
1105 : return ::com::sun::star::uno::Reference<
1106 : ::com::sun::star::uno::XInterface >(
1107 : static_cast<cppu::OWeakObject*>(new TextSearch(
1108 74 : comphelper::getComponentContext( rxMSF ) )) );
1109 : }
1110 :
1111 : extern "C"
1112 : {
1113 : SAL_DLLPUBLIC_EXPORT void* SAL_CALL
1114 14 : i18nsearch_component_getFactory( const sal_Char* sImplementationName,
1115 : void* _pServiceManager,
1116 : SAL_UNUSED_PARAMETER void* )
1117 : {
1118 14 : void* pRet = NULL;
1119 :
1120 : ::com::sun::star::lang::XMultiServiceFactory* pServiceManager =
1121 : static_cast< ::com::sun::star::lang::XMultiServiceFactory* >
1122 14 : ( _pServiceManager );
1123 : ::com::sun::star::uno::Reference<
1124 14 : ::com::sun::star::lang::XSingleServiceFactory > xFactory;
1125 :
1126 14 : if ( 0 == rtl_str_compare( sImplementationName, cSearchImpl) )
1127 : {
1128 14 : ::com::sun::star::uno::Sequence< OUString > aServiceNames(1);
1129 14 : aServiceNames[0] = getServiceName_Static();
1130 28 : xFactory = ::cppu::createSingleFactory(
1131 : pServiceManager, getImplementationName_Static(),
1132 28 : &TextSearch_CreateInstance, aServiceNames );
1133 : }
1134 :
1135 14 : if ( xFactory.is() )
1136 : {
1137 14 : xFactory->acquire();
1138 14 : pRet = xFactory.get();
1139 : }
1140 :
1141 14 : return pRet;
1142 : }
1143 :
1144 : } // extern "C"
1145 :
1146 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|