LCOV - code coverage report
Current view: top level - i18npool/source/breakiterator - breakiteratorImpl.cxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 231 296 78.0 %
Date: 2015-06-13 12:38:46 Functions: 32 37 86.5 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include <breakiteratorImpl.hxx>
      21             : #include <cppuhelper/supportsservice.hxx>
      22             : #include <unicode/uchar.h>
      23             : #include <i18nutil/unicode.hxx>
      24             : #include <rtl/ustrbuf.hxx>
      25             : 
      26             : using namespace ::com::sun::star::uno;
      27             : using namespace ::com::sun::star::lang;
      28             : 
      29             : namespace com { namespace sun { namespace star { namespace i18n {
      30             : 
      31       26608 : BreakIteratorImpl::BreakIteratorImpl( const Reference < XComponentContext >& rxContext ) : m_xContext( rxContext )
      32             : {
      33       26608 : }
      34             : 
      35        5086 : BreakIteratorImpl::BreakIteratorImpl()
      36             : {
      37        5086 : }
      38             : 
      39       85424 : BreakIteratorImpl::~BreakIteratorImpl()
      40             : {
      41             :     // Clear lookuptable
      42       40392 :     for (size_t l = 0; l < lookupTable.size(); l++)
      43       10230 :         delete lookupTable[l];
      44       30162 :     lookupTable.clear();
      45       55262 : }
      46             : 
      47             : #define LBI getLocaleSpecificBreakIterator(rLocale)
      48             : 
      49     8604646 : sal_Int32 SAL_CALL BreakIteratorImpl::nextCharacters( const OUString& Text, sal_Int32 nStartPos,
      50             :         const Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
      51             :         throw(RuntimeException, std::exception)
      52             : {
      53     8604646 :     if (nCount < 0) throw RuntimeException();
      54             : 
      55     8604646 :     return LBI->nextCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
      56             : }
      57             : 
      58       98900 : sal_Int32 SAL_CALL BreakIteratorImpl::previousCharacters( const OUString& Text, sal_Int32 nStartPos,
      59             :         const Locale& rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
      60             :         throw(RuntimeException, std::exception)
      61             : {
      62       98900 :     if (nCount < 0) throw RuntimeException();
      63             : 
      64       98900 :     return LBI->previousCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
      65             : }
      66             : 
      67             : #define isZWSP(c) (ch == 0x200B)
      68             : 
      69   268525540 : static sal_Int32 skipSpace(const OUString& Text, sal_Int32 nPos, sal_Int32 len, sal_Int16 rWordType, bool bDirection)
      70             : {
      71   268525540 :     sal_uInt32 ch=0;
      72   268525540 :     sal_Int32 pos=nPos;
      73   268525540 :     switch (rWordType) {
      74             :         case WordType::ANYWORD_IGNOREWHITESPACES:
      75        1055 :             if (bDirection)
      76         530 :                 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos;
      77             :             else
      78         525 :                 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
      79        1055 :             break;
      80             :         case WordType::DICTIONARY_WORD:
      81   267963207 :             if (bDirection)
      82   342160102 :                 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch) ||
      83   177038910 :                             ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
      84             :             else
      85   381558694 :                 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch) ||
      86   156161502 :                             ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
      87   267963207 :             break;
      88             :         case WordType::WORD_COUNT:
      89      516836 :             if (bDirection)
      90      258418 :                 while (nPos < len && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos;
      91             :             else
      92      258418 :                 while (nPos > 0 && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
      93      516836 :             break;
      94             :     }
      95   268525540 :     return nPos;
      96             : }
      97             : 
      98        2862 : Boundary SAL_CALL BreakIteratorImpl::nextWord( const OUString& Text, sal_Int32 nStartPos,
      99             :         const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException, std::exception)
     100             : {
     101        2862 :     sal_Int32 len = Text.getLength();
     102        2862 :     if( nStartPos < 0 || len == 0 )
     103          61 :         result.endPos = result.startPos = 0;
     104        2801 :     else if (nStartPos >= len)
     105           2 :         result.endPos = result.startPos = len;
     106             :     else {
     107        2799 :         result = LBI->nextWord(Text, nStartPos, rLocale, rWordType);
     108             : 
     109        2799 :         nStartPos = skipSpace(Text, result.startPos, len, rWordType, true);
     110             : 
     111        2799 :         if ( nStartPos != result.startPos) {
     112          54 :             if( nStartPos >= len )
     113           2 :                 result.startPos = result.endPos = len;
     114             :             else {
     115          52 :                 result = LBI->getWordBoundary(Text, nStartPos, rLocale, rWordType, sal_True);
     116             :                 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
     117          52 :                 if (result.startPos < nStartPos) result.startPos = nStartPos;
     118             :             }
     119             :         }
     120             :     }
     121        2862 :     return result;
     122             : }
     123             : 
     124           2 : static inline bool SAL_CALL isCJK( const Locale& rLocale ) {
     125           2 :         return rLocale.Language == "zh" || rLocale.Language == "ja" || rLocale.Language == "ko";
     126             : }
     127             : 
     128         429 : Boundary SAL_CALL BreakIteratorImpl::previousWord( const OUString& Text, sal_Int32 nStartPos,
     129             :         const Locale& rLocale, sal_Int16 rWordType) throw(RuntimeException, std::exception)
     130             : {
     131         429 :     sal_Int32 len = Text.getLength();
     132         429 :     if( nStartPos <= 0 || len == 0 ) {
     133           0 :         result.endPos = result.startPos = 0;
     134           0 :         return result;
     135         429 :     } else if (nStartPos > len) {
     136           0 :         result.endPos = result.startPos = len;
     137           0 :         return result;
     138             :     }
     139             : 
     140         429 :     sal_Int32 nPos = skipSpace(Text, nStartPos, len, rWordType, false);
     141             : 
     142             :     // if some spaces are skipped, and the script type is Asian with no CJK rLocale, we have to return
     143             :     // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary.
     144         429 :     result.startPos = nPos;
     145         429 :     if (nPos != nStartPos && nPos > 0 && !isCJK(rLocale) && getScriptClass(Text.iterateCodePoints(&nPos, -1)) == ScriptType::ASIAN) {
     146           0 :         result.endPos = -1;
     147           0 :         return result;
     148             :     }
     149             : 
     150         429 :     return LBI->previousWord(Text, result.startPos, rLocale, rWordType);
     151             : }
     152             : 
     153             : 
     154   134305294 : Boundary SAL_CALL BreakIteratorImpl::getWordBoundary( const OUString& Text, sal_Int32 nPos, const Locale& rLocale,
     155             :         sal_Int16 rWordType, sal_Bool bDirection ) throw(RuntimeException, std::exception)
     156             : {
     157   134305294 :     sal_Int32 len = Text.getLength();
     158   134305294 :     if( nPos < 0 || len == 0 )
     159       44236 :         result.endPos = result.startPos = 0;
     160   134261058 :     else if (nPos > len)
     161           0 :         result.endPos = result.startPos = len;
     162             :     else {
     163             :         sal_Int32 next, prev;
     164   134261058 :         next = skipSpace(Text, nPos, len, rWordType, true);
     165   134261058 :         prev = skipSpace(Text, nPos, len, rWordType, false);
     166   134261058 :         if (prev == 0 && next == len) {
     167         387 :             result.endPos = result.startPos = nPos;
     168   134260671 :         } else if (prev == 0 && ! bDirection) {
     169           0 :             result.endPos = result.startPos = 0;
     170   134260671 :         } else if (next == len && bDirection) {
     171       20053 :             result.endPos = result.startPos = len;
     172             :         } else {
     173   134240618 :             if (next != prev) {
     174    93927745 :                 if (next == nPos && next != len)
     175    56832543 :                     bDirection = sal_True;
     176    37095202 :                 else if (prev == nPos && prev != 0)
     177    36707572 :                     bDirection = sal_False;
     178             :                 else
     179      387630 :                     nPos = bDirection ? next : prev;
     180             :             }
     181   134240618 :             result = LBI->getWordBoundary(Text, nPos, rLocale, rWordType, bDirection);
     182             :         }
     183             :     }
     184   134305294 :     return result;
     185             : }
     186             : 
     187          98 : sal_Bool SAL_CALL BreakIteratorImpl::isBeginWord( const OUString& Text, sal_Int32 nPos,
     188             :         const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException, std::exception)
     189             : {
     190          98 :     sal_Int32 len = Text.getLength();
     191             : 
     192          98 :     if (nPos < 0 || nPos >= len) return sal_False;
     193             : 
     194          98 :     sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, true);
     195             : 
     196          98 :     if (tmp != nPos) return sal_False;
     197             : 
     198          93 :     result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_True);
     199             : 
     200          93 :     return result.startPos == nPos;
     201             : }
     202             : 
     203         100 : sal_Bool SAL_CALL BreakIteratorImpl::isEndWord( const OUString& Text, sal_Int32 nPos,
     204             :         const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException, std::exception)
     205             : {
     206         100 :     sal_Int32 len = Text.getLength();
     207             : 
     208         100 :     if (nPos <= 0 || nPos > len) return sal_False;
     209             : 
     210          98 :     sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, false);
     211             : 
     212          98 :     if (tmp != nPos) return sal_False;
     213             : 
     214          94 :     result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_False);
     215             : 
     216          94 :     return result.endPos == nPos;
     217             : }
     218             : 
     219           8 : sal_Int32 SAL_CALL BreakIteratorImpl::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
     220             :         const Locale &rLocale ) throw(RuntimeException, std::exception)
     221             : {
     222           8 :     if (nStartPos < 0 || nStartPos > Text.getLength())
     223           0 :         return -1;
     224           8 :     if (Text.isEmpty()) return 0;
     225           8 :     return LBI->beginOfSentence(Text, nStartPos, rLocale);
     226             : }
     227             : 
     228         689 : sal_Int32 SAL_CALL BreakIteratorImpl::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
     229             :         const Locale &rLocale ) throw(RuntimeException, std::exception)
     230             : {
     231         689 :     if (nStartPos < 0 || nStartPos > Text.getLength())
     232           0 :         return -1;
     233         689 :     if (Text.isEmpty()) return 0;
     234         689 :     return LBI->endOfSentence(Text, nStartPos, rLocale);
     235             : }
     236             : 
     237       42527 : LineBreakResults SAL_CALL BreakIteratorImpl::getLineBreak( const OUString& Text, sal_Int32 nStartPos,
     238             :         const Locale& rLocale, sal_Int32 nMinBreakPos, const LineBreakHyphenationOptions& hOptions,
     239             :         const LineBreakUserOptions& bOptions ) throw(RuntimeException, std::exception)
     240             : {
     241       42527 :     return LBI->getLineBreak(Text, nStartPos, rLocale, nMinBreakPos, hOptions, bOptions);
     242             : }
     243             : 
     244   316312301 : sal_Int16 SAL_CALL BreakIteratorImpl::getScriptType( const OUString& Text, sal_Int32 nPos )
     245             :         throw(RuntimeException, std::exception)
     246             : {
     247   632624528 :     return (nPos < 0 || nPos >= Text.getLength()) ? ScriptType::WEAK :
     248   632590444 :         getScriptClass(Text.iterateCodePoints(&nPos, 0));
     249             : }
     250             : 
     251             : 
     252             : /** Increments/decrements position first, then obtains character.
     253             :     @return current position, may be -1 or text length if string was consumed.
     254             :  */
     255   817652214 : static sal_Int32 SAL_CALL iterateCodePoints(const OUString& Text, sal_Int32 &nStartPos, sal_Int32 inc, sal_uInt32& ch) {
     256   817652214 :         sal_Int32 nLen = Text.getLength();
     257   817652214 :         if (nStartPos + inc < 0 || nStartPos + inc >= nLen) {
     258    85729559 :             ch = 0;
     259    85729559 :             nStartPos = nStartPos + inc < 0 ? -1 : nLen;
     260             :         } else {
     261   731922655 :             ch = Text.iterateCodePoints(&nStartPos, inc);
     262             :             // Fix for #i80436#.
     263             :             // erAck: 2009-06-30T21:52+0200  This logic looks somewhat
     264             :             // suspicious as if it cures a symptom.. anyway, had to add
     265             :             // nStartPos < Text.getLength() to silence the (correct) assertion
     266             :             // in rtl_uString_iterateCodePoints() if Text was one character
     267             :             // (codepoint) only, made up of a surrogate pair.
     268             :             //if (inc > 0 && nStartPos < Text.getLength())
     269             :             //    ch = Text.iterateCodePoints(&nStartPos, 0);
     270             :             // With surrogates, nStartPos may actually point behind string
     271             :             // now, even if inc is only +1
     272   731922655 :             if (inc > 0)
     273   695096438 :                 ch = (nStartPos < nLen ? Text.iterateCodePoints(&nStartPos, 0) : 0);
     274             :         }
     275   817652214 :         return nStartPos;
     276             : }
     277             : 
     278             : 
     279    36824427 : sal_Int32 SAL_CALL BreakIteratorImpl::beginOfScript( const OUString& Text,
     280             :         sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException, std::exception)
     281             : {
     282    36824427 :     if (nStartPos < 0 || nStartPos >= Text.getLength())
     283           2 :         return -1;
     284             : 
     285    36824425 :     if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
     286           0 :         return -1;
     287             : 
     288    36824425 :     if (nStartPos == 0) return 0;
     289    36824425 :     sal_uInt32 ch=0;
     290    73650642 :     while (iterateCodePoints(Text, nStartPos, -1, ch) >= 0 && ScriptType == getScriptClass(ch)) {
     291        2618 :         if (nStartPos == 0) return 0;
     292             :     }
     293             : 
     294    36823599 :     return  iterateCodePoints(Text, nStartPos, 1, ch);
     295             : }
     296             : 
     297    86031563 : sal_Int32 SAL_CALL BreakIteratorImpl::endOfScript( const OUString& Text,
     298             :         sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException, std::exception)
     299             : {
     300    86031563 :     if (nStartPos < 0 || nStartPos >= Text.getLength())
     301       11552 :         return -1;
     302             : 
     303    86020011 :     if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
     304           0 :         return -1;
     305             : 
     306    86020011 :     sal_Int32 strLen = Text.getLength();
     307    86020011 :     sal_uInt32 ch=0;
     308   830004651 :     while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) {
     309   658256145 :         sal_Int16 currentCharScriptType = getScriptClass(ch);
     310   658256145 :         if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK)
     311      291516 :             break;
     312             :     }
     313    86020011 :     return  nStartPos;
     314             : }
     315             : 
     316           0 : sal_Int32  SAL_CALL BreakIteratorImpl::previousScript( const OUString& Text,
     317             :         sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException, std::exception)
     318             : {
     319           0 :     if (nStartPos < 0)
     320           0 :         return -1;
     321           0 :     if (nStartPos > Text.getLength())
     322           0 :         nStartPos = Text.getLength();
     323             : 
     324           0 :     sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
     325             : 
     326           0 :     sal_uInt32 ch=0;
     327           0 :     while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
     328           0 :         if ((((numberOfChange % 2) == 0) != (ScriptType != getScriptClass(ch))))
     329           0 :             numberOfChange--;
     330           0 :         else if (nStartPos == 0) {
     331           0 :             return -1;
     332             :         }
     333             :     }
     334           0 :     return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
     335             : }
     336             : 
     337           0 : sal_Int32 SAL_CALL BreakIteratorImpl::nextScript( const OUString& Text, sal_Int32 nStartPos,
     338             :         sal_Int16 ScriptType ) throw(RuntimeException, std::exception)
     339             : 
     340             : {
     341           0 :     if (nStartPos < 0)
     342           0 :         nStartPos = 0;
     343           0 :     sal_Int32 strLen = Text.getLength();
     344           0 :     if (nStartPos >= strLen)
     345           0 :         return -1;
     346             : 
     347           0 :     sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
     348             : 
     349           0 :     sal_uInt32 ch=0;
     350           0 :     while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
     351           0 :         sal_Int16 currentCharScriptType = getScriptClass(ch);
     352           0 :         if ((numberOfChange == 1) ? (ScriptType == currentCharScriptType) :
     353           0 :                 (ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK))
     354           0 :             numberOfChange--;
     355             :     }
     356           0 :     return numberOfChange == 0 ? nStartPos : -1;
     357             : }
     358             : 
     359           0 : sal_Int32 SAL_CALL BreakIteratorImpl::beginOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
     360             :         const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException, std::exception)
     361             : {
     362           0 :     if (CharType == CharType::ANY_CHAR) return 0;
     363           0 :     if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
     364           0 :     if (CharType != (sal_Int16)u_charType( Text.iterateCodePoints(&nStartPos, 0))) return -1;
     365             : 
     366           0 :     sal_Int32 nPos=nStartPos;
     367           0 :     while(nStartPos > 0 && CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nPos, -1))) { nStartPos=nPos; }
     368           0 :     return nStartPos; // begin of char block is inclusive
     369             : }
     370             : 
     371        1640 : sal_Int32 SAL_CALL BreakIteratorImpl::endOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
     372             :         const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException, std::exception)
     373             : {
     374        1640 :     sal_Int32 strLen = Text.getLength();
     375             : 
     376        1640 :     if (CharType == CharType::ANY_CHAR) return strLen; // end of char block is exclusive
     377        1640 :     if (nStartPos < 0 || nStartPos >= strLen) return -1;
     378        1557 :     if (CharType != (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) return -1;
     379             : 
     380        1187 :     sal_uInt32 ch=0;
     381        1187 :     while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen && CharType == (sal_Int16)u_charType(ch)) {}
     382        1187 :     return nStartPos; // end of char block is exclusive
     383             : }
     384             : 
     385        1885 : sal_Int32 SAL_CALL BreakIteratorImpl::nextCharBlock( const OUString& Text, sal_Int32 nStartPos,
     386             :         const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException, std::exception)
     387             : {
     388        1885 :     if (CharType == CharType::ANY_CHAR) return -1;
     389        1885 :     if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
     390             : 
     391        1854 :     sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
     392        1854 :     sal_Int32 strLen = Text.getLength();
     393             : 
     394        1854 :     sal_uInt32 ch=0;
     395       15409 :     while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
     396       11701 :         if ((CharType != (sal_Int16)u_charType(ch)) != (numberOfChange == 1))
     397         833 :             numberOfChange--;
     398             :     }
     399        1854 :     return numberOfChange == 0 ? nStartPos : -1;
     400             : }
     401             : 
     402           0 : sal_Int32 SAL_CALL BreakIteratorImpl::previousCharBlock( const OUString& Text, sal_Int32 nStartPos,
     403             :         const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException, std::exception)
     404             : {
     405           0 :     if(CharType == CharType::ANY_CHAR) return -1;
     406           0 :     if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
     407             : 
     408           0 :     sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
     409             : 
     410           0 :     sal_uInt32 ch=0;
     411           0 :     while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
     412           0 :         if (((numberOfChange % 2) == 0) != (CharType != (sal_Int16)u_charType(ch)))
     413           0 :             numberOfChange--;
     414           0 :         if (nStartPos == 0 && numberOfChange > 0) {
     415           0 :             numberOfChange--;
     416           0 :             if (numberOfChange == 0) return nStartPos;
     417             :         }
     418             :     }
     419           0 :     return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
     420             : }
     421             : 
     422             : 
     423             : 
     424       12606 : sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/,
     425             :         sal_Int32 /*nPos*/, const Locale& /*rLocale*/ ) throw(RuntimeException, std::exception)
     426             : {
     427       12606 :     return 0;
     428             : }
     429             : 
     430             : namespace
     431             : {
     432      284417 : sal_Int16 getScriptClassByUAX24Script(sal_uInt32 currentChar)
     433             : {
     434      284417 :     int32_t script = u_getIntPropertyValue(currentChar, UCHAR_SCRIPT);
     435      284417 :     return unicode::getScriptClassFromUScriptCode(static_cast<UScriptCode>(script));
     436             : }
     437             : 
     438             : struct UBlock2Script
     439             : {
     440             :     UBlockCode from;
     441             :     UBlockCode to;
     442             :     sal_Int16 script;
     443             : };
     444             : 
     445             : static const UBlock2Script scriptList[] =
     446             : {
     447             :     {UBLOCK_NO_BLOCK, UBLOCK_NO_BLOCK, ScriptType::WEAK},
     448             :     {UBLOCK_BASIC_LATIN, UBLOCK_ARMENIAN, ScriptType::LATIN},
     449             :     {UBLOCK_HEBREW, UBLOCK_MYANMAR, ScriptType::COMPLEX},
     450             :     {UBLOCK_GEORGIAN, UBLOCK_GEORGIAN, ScriptType::LATIN},
     451             :     {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO, ScriptType::ASIAN},
     452             :     {UBLOCK_ETHIOPIC, UBLOCK_ETHIOPIC, ScriptType::COMPLEX},
     453             :     {UBLOCK_CHEROKEE, UBLOCK_RUNIC, ScriptType::LATIN},
     454             :     {UBLOCK_KHMER, UBLOCK_MONGOLIAN, ScriptType::COMPLEX},
     455             :     {UBLOCK_LATIN_EXTENDED_ADDITIONAL, UBLOCK_GREEK_EXTENDED, ScriptType::LATIN},
     456             :     {UBLOCK_NUMBER_FORMS, UBLOCK_NUMBER_FORMS, ScriptType::WEAK},
     457             :     {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES, ScriptType::ASIAN},
     458             :     {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, ScriptType::ASIAN},
     459             :     {UBLOCK_ARABIC_PRESENTATION_FORMS_A, UBLOCK_ARABIC_PRESENTATION_FORMS_A, ScriptType::COMPLEX},
     460             :     {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS, ScriptType::ASIAN},
     461             :     {UBLOCK_ARABIC_PRESENTATION_FORMS_B, UBLOCK_ARABIC_PRESENTATION_FORMS_B, ScriptType::COMPLEX},
     462             :     {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, ScriptType::ASIAN},
     463             :     {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, ScriptType::ASIAN},
     464             :     {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES, ScriptType::ASIAN},
     465             :     {UBLOCK_LATIN_EXTENDED_C, UBLOCK_LATIN_EXTENDED_D, ScriptType::LATIN}
     466             : };
     467             : 
     468             : #define scriptListCount SAL_N_ELEMENTS(scriptList)
     469             : 
     470             : //always sets rScriptType
     471             : 
     472             : //returns true for characters historically explicitly assigned to
     473             : //latin/weak/asian
     474             : 
     475             : //returns false for characters that historically implicitly assigned to
     476             : //weak as unknown
     477   786290303 : bool getCompatibilityScriptClassByBlock(sal_uInt32 currentChar, sal_Int16 &rScriptType)
     478             : {
     479   786290303 :     bool bKnown = true;
     480             :     //handle specific characters always as weak:
     481             :     //  0x01 - this breaks a word
     482             :     //  0x02 - this can be inside a word
     483             :     //  0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
     484   786290303 :     if( 0x01 == currentChar || 0x02 == currentChar || 0x20 == currentChar || 0xA0 == currentChar)
     485    43325775 :         rScriptType = ScriptType::WEAK;
     486             :     // workaround for Coptic
     487   742964528 :     else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar)
     488           0 :         rScriptType = ScriptType::LATIN;
     489             :     else
     490             :     {
     491   742964528 :         UBlockCode block=ublock_getCode(currentChar);
     492   742964528 :         size_t i = 0;
     493  2232865413 :         while (i < scriptListCount)
     494             :         {
     495  1489900885 :             if (block <= scriptList[i].to)
     496   742964528 :                 break;
     497   746936357 :             ++i;
     498             :         }
     499   742964528 :         if (i < scriptListCount && block >= scriptList[i].from)
     500   742680111 :             rScriptType = scriptList[i].script;
     501             :         else
     502             :         {
     503      284417 :             rScriptType = ScriptType::WEAK;
     504      284417 :             bKnown = false;
     505             :         }
     506             :     }
     507   786290303 :     return bKnown;
     508             : }
     509             : }
     510             : 
     511  1134204943 : sal_Int16  BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar)
     512             : {
     513             :     static sal_uInt32 lastChar = 0;
     514             :     static sal_Int16 nRet = 0;
     515             : 
     516  1134204943 :     if (currentChar != lastChar)
     517             :     {
     518   786290303 :         lastChar = currentChar;
     519             : 
     520   786290303 :         if (!getCompatibilityScriptClassByBlock(currentChar, nRet))
     521      284417 :             nRet = getScriptClassByUAX24Script(currentChar);
     522             :     }
     523             : 
     524  1134204943 :     return nRet;
     525             : }
     526             : 
     527   144283806 : static inline bool operator == (const Locale& l1, const Locale& l2) {
     528   144283806 :         return l1.Language == l2.Language && l1.Country == l2.Country && l1.Variant == l2.Variant;
     529             : }
     530             : 
     531       14383 : bool SAL_CALL BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString& aLocaleName) throw( RuntimeException )
     532             : {
     533             :     // to share service between same Language but different Country code, like zh_CN and zh_TW
     534       15047 :     for (size_t l = 0; l < lookupTable.size(); l++) {
     535         788 :         lookupTableItem *listItem = lookupTable[l];
     536         788 :         if (aLocaleName == listItem->aLocale.Language) {
     537         124 :             xBI = listItem->xBI;
     538         124 :             return true;
     539             :         }
     540             :     }
     541             : 
     542       28518 :     Reference < uno::XInterface > xI = m_xContext->getServiceManager()->createInstanceWithContext(
     543       14259 :             "com.sun.star.i18n.BreakIterator_" + aLocaleName, m_xContext);
     544             : 
     545       14259 :     if ( xI.is() ) {
     546        5086 :         xBI.set(xI, UNO_QUERY);
     547        5086 :         if (xBI.is()) {
     548        5086 :             lookupTable.push_back(new lookupTableItem(Locale(aLocaleName, aLocaleName, aLocaleName), xBI));
     549        5086 :             return true;
     550             :         }
     551             :     }
     552        9173 :     return false;
     553             : }
     554             : 
     555             : Reference < XBreakIterator > SAL_CALL
     556   142990668 : BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale& rLocale) throw (RuntimeException)
     557             : {
     558   142990668 :     if (xBI.is() && rLocale == aLocale)
     559   142468461 :         return xBI;
     560      522207 :     else if (m_xContext.is()) {
     561      522207 :         aLocale = rLocale;
     562             : 
     563     1303430 :         for (size_t i = 0; i < lookupTable.size(); i++) {
     564     1298220 :             lookupTableItem *listItem = lookupTable[i];
     565     1298220 :             if (rLocale == listItem->aLocale)
     566     1039204 :                 return xBI = listItem->xBI;
     567             :         }
     568             : 
     569        5210 :         sal_Unicode under = (sal_Unicode)'_';
     570             : 
     571        5210 :         sal_Int32 l = rLocale.Language.getLength();
     572        5210 :         sal_Int32 c = rLocale.Country.getLength();
     573        5210 :         sal_Int32 v = rLocale.Variant.getLength();
     574        5210 :         OUStringBuffer aBuf(l+c+v+3);
     575             : 
     576       20813 :         if ((l > 0 && c > 0 && v > 0 &&
     577             :                     // load service with name <base>_<lang>_<country>_<varian>
     578           0 :                     createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
     579       10420 :                             rLocale.Country).append(under).append(rLocale.Variant).makeStringAndClear())) ||
     580       15517 :                 (l > 0 && c > 0 &&
     581             :                  // load service with name <base>_<lang>_<country>
     582        5167 :                  createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
     583       25380 :                          rLocale.Country).makeStringAndClear())) ||
     584        4702 :                 (l > 0 && c > 0 && rLocale.Language == "zh" &&
     585         120 :                  (rLocale.Country == "HK" ||
     586          60 :                   rLocale.Country == "MO" ) &&
     587             :                  // if the country code is HK or MO, one more step to try TW.
     588           0 :                  createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).appendAscii(
     589        9879 :                          "TW").makeStringAndClear())) ||
     590        4642 :                 (l > 0 &&
     591             :                  // load service with name <base>_<lang>
     592       19000 :                  createLocaleSpecificBreakIterator(rLocale.Language)) ||
     593             :                 // load default service with name <base>_Unicode
     594       14358 :                 createLocaleSpecificBreakIterator(OUString("Unicode"))) {
     595        5210 :             lookupTable.push_back( new lookupTableItem(aLocale, xBI) );
     596        5210 :             return xBI;
     597           0 :         }
     598             :     }
     599           0 :     throw RuntimeException();
     600             : }
     601             : 
     602             : OUString SAL_CALL
     603           1 : BreakIteratorImpl::getImplementationName() throw( RuntimeException, std::exception )
     604             : {
     605           1 :     return OUString("com.sun.star.i18n.BreakIterator");
     606             : }
     607             : 
     608             : sal_Bool SAL_CALL
     609           0 : BreakIteratorImpl::supportsService(const OUString& rServiceName) throw( RuntimeException, std::exception )
     610             : {
     611           0 :     return cppu::supportsService(this, rServiceName);
     612             : }
     613             : 
     614             : Sequence< OUString > SAL_CALL
     615           1 : BreakIteratorImpl::getSupportedServiceNames() throw( RuntimeException, std::exception )
     616             : {
     617           1 :     Sequence< OUString > aRet(1);
     618           1 :     aRet[0] = "com.sun.star.i18n.BreakIterator";
     619           1 :     return aRet;
     620             : }
     621             : 
     622             : } } } }
     623             : 
     624             : extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * SAL_CALL
     625       26602 : com_sun_star_i18n_BreakIterator_get_implementation(
     626             :     css::uno::XComponentContext *context,
     627             :     css::uno::Sequence<css::uno::Any> const &)
     628             : {
     629       26602 :     return cppu::acquire(new com::sun::star::i18n::BreakIteratorImpl(context));
     630             : }
     631             : 
     632             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11