LCOV - code coverage report
Current view: top level - i18npool/source/breakiterator - breakiteratorImpl.cxx (source / functions) Hit Total Coverage
Test: commit 0e63ca4fde4e446f346e35849c756a30ca294aab Lines: 223 296 75.3 %
Date: 2014-04-11 Functions: 30 37 81.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include <breakiteratorImpl.hxx>
      21             : #include <cppuhelper/supportsservice.hxx>
      22             : #include <unicode/uchar.h>
      23             : #include <i18nutil/unicode.hxx>
      24             : #include <rtl/ustrbuf.hxx>
      25             : 
      26             : using namespace ::com::sun::star::uno;
      27             : using namespace ::com::sun::star::lang;
      28             : using namespace ::rtl;
      29             : 
      30             : namespace com { namespace sun { namespace star { namespace i18n {
      31             : 
      32       16520 : BreakIteratorImpl::BreakIteratorImpl( const Reference < XComponentContext >& rxContext ) : m_xContext( rxContext )
      33             : {
      34       16520 : }
      35             : 
      36        2591 : BreakIteratorImpl::BreakIteratorImpl()
      37             : {
      38        2591 : }
      39             : 
      40       49857 : BreakIteratorImpl::~BreakIteratorImpl()
      41             : {
      42             :     // Clear lookuptable
      43       22394 :     for (size_t l = 0; l < lookupTable.size(); l++)
      44        4958 :         delete lookupTable[l];
      45       17436 :     lookupTable.clear();
      46       32421 : }
      47             : 
      48             : #define LBI getLocaleSpecificBreakIterator(rLocale)
      49             : 
      50      244761 : sal_Int32 SAL_CALL BreakIteratorImpl::nextCharacters( const OUString& Text, sal_Int32 nStartPos,
      51             :         const Locale &rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
      52             :         throw(RuntimeException, std::exception)
      53             : {
      54      244761 :     if (nCount < 0) throw RuntimeException();
      55             : 
      56      244761 :     return LBI->nextCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
      57             : }
      58             : 
      59       36430 : sal_Int32 SAL_CALL BreakIteratorImpl::previousCharacters( const OUString& Text, sal_Int32 nStartPos,
      60             :         const Locale& rLocale, sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
      61             :         throw(RuntimeException, std::exception)
      62             : {
      63       36430 :     if (nCount < 0) throw RuntimeException();
      64             : 
      65       36430 :     return LBI->previousCharacters( Text, nStartPos, rLocale, nCharacterIteratorMode, nCount, nDone);
      66             : }
      67             : 
      68             : #define isZWSP(c) (ch == 0x200B)
      69             : 
      70      194680 : static sal_Int32 skipSpace(const OUString& Text, sal_Int32 nPos, sal_Int32 len, sal_Int16 rWordType, sal_Bool bDirection)
      71             : {
      72      194680 :     sal_uInt32 ch=0;
      73      194680 :     sal_Int32 pos=nPos;
      74      194680 :     switch (rWordType) {
      75             :         case WordType::ANYWORD_IGNOREWHITESPACES:
      76        1049 :             if (bDirection)
      77         527 :                 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos;
      78             :             else
      79         522 :                 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
      80        1049 :             break;
      81             :         case WordType::DICTIONARY_WORD:
      82      143623 :             if (bDirection)
      83      150530 :                 while (nPos < len && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch) ||
      84       75139 :                             ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
      85             :             else
      86      181218 :                 while (nPos > 0 && (u_isWhitespace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch) ||
      87       74133 :                             ! (ch == 0x002E || u_isalnum(ch)))) nPos=pos;
      88      143623 :             break;
      89             :         case WordType::WORD_COUNT:
      90       16908 :             if (bDirection)
      91        8454 :                 while (nPos < len && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, 1)) || isZWSP(ch))) nPos=pos;
      92             :             else
      93        8454 :                 while (nPos > 0 && (u_isUWhiteSpace(ch = Text.iterateCodePoints(&pos, -1)) || isZWSP(ch))) nPos=pos;
      94       16908 :             break;
      95             :     }
      96      194680 :     return nPos;
      97             : }
      98             : 
      99        3267 : Boundary SAL_CALL BreakIteratorImpl::nextWord( const OUString& Text, sal_Int32 nStartPos,
     100             :         const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException, std::exception)
     101             : {
     102        3267 :     sal_Int32 len = Text.getLength();
     103        3267 :     if( nStartPos < 0 || len == 0 )
     104          61 :         result.endPos = result.startPos = 0;
     105        3206 :     else if (nStartPos >= len)
     106           5 :         result.endPos = result.startPos = len;
     107             :     else {
     108        3201 :         result = LBI->nextWord(Text, nStartPos, rLocale, rWordType);
     109             : 
     110        3201 :         nStartPos = skipSpace(Text, result.startPos, len, rWordType, sal_True);
     111             : 
     112        3201 :         if ( nStartPos != result.startPos) {
     113          18 :             if( nStartPos >= len )
     114           0 :                 result.startPos = result.endPos = len;
     115             :             else {
     116          18 :                 result = LBI->getWordBoundary(Text, nStartPos, rLocale, rWordType, sal_True);
     117             :                 // i88041: avoid startPos goes back to nStartPos when switching between Latin and CJK scripts
     118          18 :                 if (result.startPos < nStartPos) result.startPos = nStartPos;
     119             :             }
     120             :         }
     121             :     }
     122        3267 :     return result;
     123             : }
     124             : 
     125           3 : static inline sal_Bool SAL_CALL isCJK( const Locale& rLocale ) {
     126           3 :         return rLocale.Language == "zh" || rLocale.Language == "ja" || rLocale.Language == "ko";
     127             : }
     128             : 
     129         431 : Boundary SAL_CALL BreakIteratorImpl::previousWord( const OUString& Text, sal_Int32 nStartPos,
     130             :         const Locale& rLocale, sal_Int16 rWordType) throw(RuntimeException, std::exception)
     131             : {
     132         431 :     sal_Int32 len = Text.getLength();
     133         431 :     if( nStartPos <= 0 || len == 0 ) {
     134           0 :         result.endPos = result.startPos = 0;
     135           0 :         return result;
     136         431 :     } else if (nStartPos > len) {
     137           0 :         result.endPos = result.startPos = len;
     138           0 :         return result;
     139             :     }
     140             : 
     141         431 :     sal_Int32 nPos = skipSpace(Text, nStartPos, len, rWordType, sal_False);
     142             : 
     143             :     // if some spaces are skiped, and the script type is Asian with no CJK rLocale, we have to return
     144             :     // (nStartPos, -1) for caller to send correct rLocale for loading correct dictionary.
     145         431 :     result.startPos = nPos;
     146         431 :     if (nPos != nStartPos && nPos > 0 && !isCJK(rLocale) && getScriptClass(Text.iterateCodePoints(&nPos, -1)) == ScriptType::ASIAN) {
     147           0 :         result.endPos = -1;
     148           0 :         return result;
     149             :     }
     150             : 
     151         431 :     return LBI->previousWord(Text, result.startPos, rLocale, rWordType);
     152             : }
     153             : 
     154             : 
     155      121950 : Boundary SAL_CALL BreakIteratorImpl::getWordBoundary( const OUString& Text, sal_Int32 nPos, const Locale& rLocale,
     156             :         sal_Int16 rWordType, sal_Bool bDirection ) throw(RuntimeException, std::exception)
     157             : {
     158      121950 :     sal_Int32 len = Text.getLength();
     159      121950 :     if( nPos < 0 || len == 0 )
     160       26533 :         result.endPos = result.startPos = 0;
     161       95417 :     else if (nPos > len)
     162           0 :         result.endPos = result.startPos = len;
     163             :     else {
     164             :         sal_Int32 next, prev;
     165       95417 :         next = skipSpace(Text, nPos, len, rWordType, sal_True);
     166       95417 :         prev = skipSpace(Text, nPos, len, rWordType, sal_False);
     167       95417 :         if (prev == 0 && next == len) {
     168         229 :             result.endPos = result.startPos = nPos;
     169       95188 :         } else if (prev == 0 && ! bDirection) {
     170           0 :             result.endPos = result.startPos = 0;
     171       95188 :         } else if (next == len && bDirection) {
     172       14708 :             result.endPos = result.startPos = len;
     173             :         } else {
     174       80480 :             if (next != prev) {
     175       27313 :                 if (next == nPos && next != len)
     176       26396 :                     bDirection = sal_True;
     177         917 :                 else if (prev == nPos && prev != 0)
     178         238 :                     bDirection = sal_False;
     179             :                 else
     180         679 :                     nPos = bDirection ? next : prev;
     181             :             }
     182       80480 :             result = LBI->getWordBoundary(Text, nPos, rLocale, rWordType, bDirection);
     183             :         }
     184             :     }
     185      121950 :     return result;
     186             : }
     187             : 
     188         110 : sal_Bool SAL_CALL BreakIteratorImpl::isBeginWord( const OUString& Text, sal_Int32 nPos,
     189             :         const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException, std::exception)
     190             : {
     191         110 :     sal_Int32 len = Text.getLength();
     192             : 
     193         110 :     if (nPos < 0 || nPos >= len) return sal_False;
     194             : 
     195         107 :     sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, sal_True);
     196             : 
     197         107 :     if (tmp != nPos) return sal_False;
     198             : 
     199         100 :     result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_True);
     200             : 
     201         100 :     return result.startPos == nPos;
     202             : }
     203             : 
     204         117 : sal_Bool SAL_CALL BreakIteratorImpl::isEndWord( const OUString& Text, sal_Int32 nPos,
     205             :         const Locale& rLocale, sal_Int16 rWordType ) throw(RuntimeException, std::exception)
     206             : {
     207         117 :     sal_Int32 len = Text.getLength();
     208             : 
     209         117 :     if (nPos <= 0 || nPos > len) return sal_False;
     210             : 
     211         107 :     sal_Int32 tmp = skipSpace(Text, nPos, len, rWordType, sal_False);
     212             : 
     213         107 :     if (tmp != nPos) return sal_False;
     214             : 
     215         103 :     result = getWordBoundary(Text, nPos, rLocale, rWordType, sal_False);
     216             : 
     217         103 :     return result.endPos == nPos;
     218             : }
     219             : 
     220          17 : sal_Int32 SAL_CALL BreakIteratorImpl::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
     221             :         const Locale &rLocale ) throw(RuntimeException, std::exception)
     222             : {
     223          17 :     if (nStartPos < 0 || nStartPos > Text.getLength())
     224           0 :         return -1;
     225          17 :     if (Text.isEmpty()) return 0;
     226          17 :     return LBI->beginOfSentence(Text, nStartPos, rLocale);
     227             : }
     228             : 
     229         450 : sal_Int32 SAL_CALL BreakIteratorImpl::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
     230             :         const Locale &rLocale ) throw(RuntimeException, std::exception)
     231             : {
     232         450 :     if (nStartPos < 0 || nStartPos > Text.getLength())
     233           0 :         return -1;
     234         450 :     if (Text.isEmpty()) return 0;
     235         450 :     return LBI->endOfSentence(Text, nStartPos, rLocale);
     236             : }
     237             : 
     238       84034 : LineBreakResults SAL_CALL BreakIteratorImpl::getLineBreak( const OUString& Text, sal_Int32 nStartPos,
     239             :         const Locale& rLocale, sal_Int32 nMinBreakPos, const LineBreakHyphenationOptions& hOptions,
     240             :         const LineBreakUserOptions& bOptions ) throw(RuntimeException, std::exception)
     241             : {
     242       84034 :     return LBI->getLineBreak(Text, nStartPos, rLocale, nMinBreakPos, hOptions, bOptions);
     243             : }
     244             : 
     245      887292 : sal_Int16 SAL_CALL BreakIteratorImpl::getScriptType( const OUString& Text, sal_Int32 nPos )
     246             :         throw(RuntimeException, std::exception)
     247             : {
     248     1774530 :     return (nPos < 0 || nPos >= Text.getLength()) ? ScriptType::WEAK :
     249     1755203 :         getScriptClass(Text.iterateCodePoints(&nPos, 0));
     250             : }
     251             : 
     252             : 
     253             : /** Increments/decrements position first, then obtains character.
     254             :     @return current position, may be -1 or text length if string was consumed.
     255             :  */
     256    16775067 : static sal_Int32 SAL_CALL iterateCodePoints(const OUString& Text, sal_Int32 &nStartPos, sal_Int32 inc, sal_uInt32& ch) {
     257    16775067 :         sal_Int32 nLen = Text.getLength();
     258    16775067 :         if (nStartPos + inc < 0 || nStartPos + inc >= nLen) {
     259      794375 :             ch = 0;
     260      794375 :             nStartPos = nStartPos + inc < 0 ? -1 : nLen;
     261             :         } else {
     262    15980692 :             ch = Text.iterateCodePoints(&nStartPos, inc);
     263             :             // Fix for #i80436#.
     264             :             // erAck: 2009-06-30T21:52+0200  This logic looks somewhat
     265             :             // suspicious as if it cures a symptom.. anyway, had to add
     266             :             // nStartPos < Text.getLength() to silence the (correct) assertion
     267             :             // in rtl_uString_iterateCodePoints() if Text was one character
     268             :             // (codepoint) only, made up of a surrogate pair.
     269             :             //if (inc > 0 && nStartPos < Text.getLength())
     270             :             //    ch = Text.iterateCodePoints(&nStartPos, 0);
     271             :             // With surrogates, nStartPos may actually point behind string
     272             :             // now, even if inc is only +1
     273    15980692 :             if (inc > 0)
     274    15971113 :                 ch = (nStartPos < nLen ? Text.iterateCodePoints(&nStartPos, 0) : 0);
     275             :         }
     276    16775067 :         return nStartPos;
     277             : }
     278             : 
     279             : 
     280        8027 : sal_Int32 SAL_CALL BreakIteratorImpl::beginOfScript( const OUString& Text,
     281             :         sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException, std::exception)
     282             : {
     283        8027 :     if (nStartPos < 0 || nStartPos >= Text.getLength())
     284           0 :         return -1;
     285             : 
     286        8027 :     if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
     287           0 :         return -1;
     288             : 
     289        8027 :     if (nStartPos == 0) return 0;
     290        8027 :     sal_uInt32 ch=0;
     291       17606 :     while (iterateCodePoints(Text, nStartPos, -1, ch) >= 0 && ScriptType == getScriptClass(ch)) {
     292        2343 :         if (nStartPos == 0) return 0;
     293             :     }
     294             : 
     295        7236 :     return  iterateCodePoints(Text, nStartPos, 1, ch);
     296             : }
     297             : 
     298      272041 : sal_Int32 SAL_CALL BreakIteratorImpl::endOfScript( const OUString& Text,
     299             :         sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException, std::exception)
     300             : {
     301      272041 :     if (nStartPos < 0 || nStartPos >= Text.getLength())
     302        5822 :         return -1;
     303             : 
     304      266219 :     if(ScriptType != getScriptClass(Text.iterateCodePoints(&nStartPos, 0)))
     305           0 :         return -1;
     306             : 
     307      266219 :     sal_Int32 strLen = Text.getLength();
     308      266219 :     sal_uInt32 ch=0;
     309     5451711 :     while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen ) {
     310     4925855 :         sal_Int16 currentCharScriptType = getScriptClass(ch);
     311     4925855 :         if(ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK)
     312        6582 :             break;
     313             :     }
     314      266219 :     return  nStartPos;
     315             : }
     316             : 
     317           0 : sal_Int32  SAL_CALL BreakIteratorImpl::previousScript( const OUString& Text,
     318             :         sal_Int32 nStartPos, sal_Int16 ScriptType ) throw(RuntimeException, std::exception)
     319             : {
     320           0 :     if (nStartPos < 0)
     321           0 :         return -1;
     322           0 :     if (nStartPos > Text.getLength())
     323           0 :         nStartPos = Text.getLength();
     324             : 
     325           0 :     sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
     326             : 
     327           0 :     sal_uInt32 ch=0;
     328           0 :     while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
     329           0 :         if ((((numberOfChange % 2) == 0) != (ScriptType != getScriptClass(ch))))
     330           0 :             numberOfChange--;
     331           0 :         else if (nStartPos == 0) {
     332           0 :             return -1;
     333             :         }
     334             :     }
     335           0 :     return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
     336             : }
     337             : 
     338           0 : sal_Int32 SAL_CALL BreakIteratorImpl::nextScript( const OUString& Text, sal_Int32 nStartPos,
     339             :         sal_Int16 ScriptType ) throw(RuntimeException, std::exception)
     340             : 
     341             : {
     342           0 :     if (nStartPos < 0)
     343           0 :         nStartPos = 0;
     344           0 :     sal_Int32 strLen = Text.getLength();
     345           0 :     if (nStartPos >= strLen)
     346           0 :         return -1;
     347             : 
     348           0 :     sal_Int16 numberOfChange = (ScriptType == getScriptClass(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
     349             : 
     350           0 :     sal_uInt32 ch=0;
     351           0 :     while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
     352           0 :         sal_Int16 currentCharScriptType = getScriptClass(ch);
     353           0 :         if ((numberOfChange == 1) ? (ScriptType == currentCharScriptType) :
     354           0 :                 (ScriptType != currentCharScriptType && currentCharScriptType != ScriptType::WEAK))
     355           0 :             numberOfChange--;
     356             :     }
     357           0 :     return numberOfChange == 0 ? nStartPos : -1;
     358             : }
     359             : 
     360           0 : sal_Int32 SAL_CALL BreakIteratorImpl::beginOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
     361             :         const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException, std::exception)
     362             : {
     363           0 :     if (CharType == CharType::ANY_CHAR) return 0;
     364           0 :     if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
     365           0 :     if (CharType != (sal_Int16)u_charType( Text.iterateCodePoints(&nStartPos, 0))) return -1;
     366             : 
     367           0 :     sal_Int32 nPos=nStartPos;
     368           0 :     while(nStartPos > 0 && CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nPos, -1))) { nStartPos=nPos; }
     369           0 :     return nStartPos; // begin of char block is inclusive
     370             : }
     371             : 
     372      172988 : sal_Int32 SAL_CALL BreakIteratorImpl::endOfCharBlock( const OUString& Text, sal_Int32 nStartPos,
     373             :         const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException, std::exception)
     374             : {
     375      172988 :     sal_Int32 strLen = Text.getLength();
     376             : 
     377      172988 :     if (CharType == CharType::ANY_CHAR) return strLen; // end of char block is exclusive
     378      172988 :     if (nStartPos < 0 || nStartPos >= strLen) return -1;
     379      172941 :     if (CharType != (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) return -1;
     380             : 
     381      172893 :     sal_uInt32 ch=0;
     382      172893 :     while(iterateCodePoints(Text, nStartPos, 1, ch) < strLen && CharType == (sal_Int16)u_charType(ch)) {}
     383      172893 :     return nStartPos; // end of char block is exclusive
     384             : }
     385             : 
     386     1216069 : sal_Int32 SAL_CALL BreakIteratorImpl::nextCharBlock( const OUString& Text, sal_Int32 nStartPos,
     387             :         const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException, std::exception)
     388             : {
     389     1216069 :     if (CharType == CharType::ANY_CHAR) return -1;
     390     1216069 :     if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
     391             : 
     392     1207374 :     sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 2 : 1;
     393     1207374 :     sal_Int32 strLen = Text.getLength();
     394             : 
     395     1207374 :     sal_uInt32 ch=0;
     396    13356074 :     while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, 1, ch) < strLen) {
     397    10941326 :         if ((CharType != (sal_Int16)u_charType(ch)) != (numberOfChange == 1))
     398      753552 :             numberOfChange--;
     399             :     }
     400     1207374 :     return numberOfChange == 0 ? nStartPos : -1;
     401             : }
     402             : 
     403           0 : sal_Int32 SAL_CALL BreakIteratorImpl::previousCharBlock( const OUString& Text, sal_Int32 nStartPos,
     404             :         const Locale& /*rLocale*/, sal_Int16 CharType ) throw(RuntimeException, std::exception)
     405             : {
     406           0 :     if(CharType == CharType::ANY_CHAR) return -1;
     407           0 :     if (nStartPos < 0 || nStartPos >= Text.getLength()) return -1;
     408             : 
     409           0 :     sal_Int16 numberOfChange = (CharType == (sal_Int16)u_charType(Text.iterateCodePoints(&nStartPos, 0))) ? 3 : 2;
     410             : 
     411           0 :     sal_uInt32 ch=0;
     412           0 :     while (numberOfChange > 0 && iterateCodePoints(Text, nStartPos, -1, ch) >= 0) {
     413           0 :         if (((numberOfChange % 2) == 0) != (CharType != (sal_Int16)u_charType(ch)))
     414           0 :             numberOfChange--;
     415           0 :         if (nStartPos == 0 && numberOfChange > 0) {
     416           0 :             numberOfChange--;
     417           0 :             if (numberOfChange == 0) return nStartPos;
     418             :         }
     419             :     }
     420           0 :     return numberOfChange == 0 ? iterateCodePoints(Text, nStartPos, 1, ch) : -1;
     421             : }
     422             : 
     423             : 
     424             : 
     425        9089 : sal_Int16 SAL_CALL BreakIteratorImpl::getWordType( const OUString& /*Text*/,
     426             :         sal_Int32 /*nPos*/, const Locale& /*rLocale*/ ) throw(RuntimeException, std::exception)
     427             : {
     428        9089 :     return 0;
     429             : }
     430             : 
     431             : namespace
     432             : {
     433        5429 : sal_Int16 getScriptClassByUAX24Script(sal_uInt32 currentChar)
     434             : {
     435        5429 :     int32_t script = u_getIntPropertyValue(currentChar, UCHAR_SCRIPT);
     436        5429 :     return unicode::getScriptClassFromUScriptCode(static_cast<UScriptCode>(script));
     437             : }
     438             : 
     439             : struct UBlock2Script
     440             : {
     441             :     UBlockCode from;
     442             :     UBlockCode to;
     443             :     sal_Int16 script;
     444             : };
     445             : 
     446             : static const UBlock2Script scriptList[] =
     447             : {
     448             :     {UBLOCK_NO_BLOCK, UBLOCK_NO_BLOCK, ScriptType::WEAK},
     449             :     {UBLOCK_BASIC_LATIN, UBLOCK_ARMENIAN, ScriptType::LATIN},
     450             :     {UBLOCK_HEBREW, UBLOCK_MYANMAR, ScriptType::COMPLEX},
     451             :     {UBLOCK_GEORGIAN, UBLOCK_GEORGIAN, ScriptType::LATIN},
     452             :     {UBLOCK_HANGUL_JAMO, UBLOCK_HANGUL_JAMO, ScriptType::ASIAN},
     453             :     {UBLOCK_ETHIOPIC, UBLOCK_ETHIOPIC, ScriptType::COMPLEX},
     454             :     {UBLOCK_CHEROKEE, UBLOCK_RUNIC, ScriptType::LATIN},
     455             :     {UBLOCK_KHMER, UBLOCK_MONGOLIAN, ScriptType::COMPLEX},
     456             :     {UBLOCK_LATIN_EXTENDED_ADDITIONAL, UBLOCK_GREEK_EXTENDED, ScriptType::LATIN},
     457             :     {UBLOCK_NUMBER_FORMS, UBLOCK_NUMBER_FORMS, ScriptType::WEAK},
     458             :     {UBLOCK_CJK_RADICALS_SUPPLEMENT, UBLOCK_HANGUL_SYLLABLES, ScriptType::ASIAN},
     459             :     {UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS, ScriptType::ASIAN},
     460             :     {UBLOCK_ARABIC_PRESENTATION_FORMS_A, UBLOCK_ARABIC_PRESENTATION_FORMS_A, ScriptType::COMPLEX},
     461             :     {UBLOCK_CJK_COMPATIBILITY_FORMS, UBLOCK_CJK_COMPATIBILITY_FORMS, ScriptType::ASIAN},
     462             :     {UBLOCK_ARABIC_PRESENTATION_FORMS_B, UBLOCK_ARABIC_PRESENTATION_FORMS_B, ScriptType::COMPLEX},
     463             :     {UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS, ScriptType::ASIAN},
     464             :     {UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B, UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT, ScriptType::ASIAN},
     465             :     {UBLOCK_CJK_STROKES, UBLOCK_CJK_STROKES, ScriptType::ASIAN},
     466             :     {UBLOCK_LATIN_EXTENDED_C, UBLOCK_LATIN_EXTENDED_D, ScriptType::LATIN}
     467             : };
     468             : 
     469             : #define scriptListCount SAL_N_ELEMENTS(scriptList)
     470             : 
     471             : //always sets rScriptType
     472             : 
     473             : //returns true for characters historically explicitly assigned to
     474             : //latin/weak/asian
     475             : 
     476             : //returns false for characters that historically implicitly assigned to
     477             : //weak as unknown
     478     5339754 : bool getCompatibilityScriptClassByBlock(sal_uInt32 currentChar, sal_Int16 &rScriptType)
     479             : {
     480     5339754 :     bool bKnown = true;
     481             :     //handle specific characters always as weak:
     482             :     //  0x01 - this breaks a word
     483             :     //  0x02 - this can be inside a word
     484             :     //  0x20 & 0xA0 - Bug 102975, declare western space and non-break space as WEAK char.
     485     5339754 :     if( 0x01 == currentChar || 0x02 == currentChar || 0x20 == currentChar || 0xA0 == currentChar)
     486      507752 :         rScriptType = ScriptType::WEAK;
     487             :     // workaround for Coptic
     488     4832002 :     else if ( 0x2C80 <= currentChar && 0x2CE3 >= currentChar)
     489           0 :         rScriptType = ScriptType::LATIN;
     490             :     else
     491             :     {
     492     4832002 :         UBlockCode block=ublock_getCode(currentChar);
     493     4832002 :         size_t i = 0;
     494    14553921 :         while (i < scriptListCount)
     495             :         {
     496     9721919 :             if (block <= scriptList[i].to)
     497     4832002 :                 break;
     498     4889917 :             ++i;
     499             :         }
     500     4832002 :         if (i < scriptListCount && block >= scriptList[i].from)
     501     4826573 :             rScriptType = scriptList[i].script;
     502             :         else
     503             :         {
     504        5429 :             rScriptType = ScriptType::WEAK;
     505        5429 :             bKnown = false;
     506             :         }
     507             :     }
     508     5339754 :     return bKnown;
     509             : }
     510             : }
     511             : 
     512     6077594 : sal_Int16  BreakIteratorImpl::getScriptClass(sal_uInt32 currentChar)
     513             : {
     514             :     static sal_uInt32 lastChar = 0;
     515             :     static sal_Int16 nRet = 0;
     516             : 
     517     6077594 :     if (currentChar != lastChar)
     518             :     {
     519     5339754 :         lastChar = currentChar;
     520             : 
     521     5339754 :         if (!getCompatibilityScriptClassByBlock(currentChar, nRet))
     522        5429 :             nRet = getScriptClassByUAX24Script(currentChar);
     523             :     }
     524             : 
     525     6077594 :     return nRet;
     526             : }
     527             : 
     528      497673 : static inline sal_Bool operator == (const Locale& l1, const Locale& l2) {
     529      497673 :         return l1.Language == l2.Language && l1.Country == l2.Country && l1.Variant == l2.Variant;
     530             : }
     531             : 
     532        7931 : sal_Bool SAL_CALL BreakIteratorImpl::createLocaleSpecificBreakIterator(const OUString& aLocaleName) throw( RuntimeException )
     533             : {
     534             :     // to share service between same Language but different Country code, like zh_CN and zh_TW
     535        8303 :     for (size_t l = 0; l < lookupTable.size(); l++) {
     536         445 :         lookupTableItem *listItem = lookupTable[l];
     537         445 :         if (aLocaleName == listItem->aLocale.Language) {
     538          73 :             xBI = listItem->xBI;
     539          73 :             return sal_True;
     540             :         }
     541             :     }
     542             : 
     543       15716 :     Reference < uno::XInterface > xI = m_xContext->getServiceManager()->createInstanceWithContext(
     544        7858 :             OUString("com.sun.star.i18n.BreakIterator_") + aLocaleName, m_xContext);
     545             : 
     546        7858 :     if ( xI.is() ) {
     547        2591 :         xBI.set(xI, UNO_QUERY);
     548        2591 :         if (xBI.is()) {
     549        2591 :             lookupTable.push_back(new lookupTableItem(Locale(aLocaleName, aLocaleName, aLocaleName), xBI));
     550        2591 :             return sal_True;
     551             :         }
     552             :     }
     553        5267 :     return sal_False;
     554             : }
     555             : 
     556             : Reference < XBreakIterator > SAL_CALL
     557      449822 : BreakIteratorImpl::getLocaleSpecificBreakIterator(const Locale& rLocale) throw (RuntimeException)
     558             : {
     559      449822 :     if (xBI.is() && rLocale == aLocale)
     560      429045 :         return xBI;
     561       20777 :     else if (m_xContext.is()) {
     562       20777 :         aLocale = rLocale;
     563             : 
     564       53105 :         for (size_t i = 0; i < lookupTable.size(); i++) {
     565       50441 :             lookupTableItem *listItem = lookupTable[i];
     566       50441 :             if (rLocale == listItem->aLocale)
     567       38890 :                 return xBI = listItem->xBI;
     568             :         }
     569             : 
     570        2664 :         sal_Unicode under = (sal_Unicode)'_';
     571             : 
     572        2664 :         sal_Int32 l = rLocale.Language.getLength();
     573        2664 :         sal_Int32 c = rLocale.Country.getLength();
     574        2664 :         sal_Int32 v = rLocale.Variant.getLength();
     575        2664 :         OUStringBuffer aBuf(l+c+v+3);
     576             : 
     577       10639 :         if ((l > 0 && c > 0 && v > 0 &&
     578             :                     // load service with name <base>_<lang>_<country>_<varian>
     579           0 :                     createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
     580        5328 :                             rLocale.Country).append(under).append(rLocale.Variant).makeStringAndClear())) ||
     581        7929 :                 (l > 0 && c > 0 &&
     582             :                  // load service with name <base>_<lang>_<country>
     583        2641 :                  createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).append(
     584       13251 :                          rLocale.Country).makeStringAndClear())) ||
     585        2649 :                 (l > 0 && c > 0 && rLocale.Language.equalsAscii("zh") &&
     586           4 :                  (rLocale.Country.equalsAscii("HK") ||
     587           2 :                   rLocale.Country.equalsAscii("MO") ) &&
     588             :                  // if the country code is HK or MO, one more step to try TW.
     589           0 :                  createLocaleSpecificBreakIterator(aBuf.append(rLocale.Language).append(under).appendAscii(
     590        5328 :                          "TW").makeStringAndClear())) ||
     591        2647 :                 (l > 0 &&
     592             :                  // load service with name <base>_<lang>
     593       10597 :                  createLocaleSpecificBreakIterator(rLocale.Language)) ||
     594             :                 // load default service with name <base>_Unicode
     595        7950 :                 createLocaleSpecificBreakIterator(OUString("Unicode"))) {
     596        2664 :             lookupTable.push_back( new lookupTableItem(aLocale, xBI) );
     597        2664 :             return xBI;
     598           0 :         }
     599             :     }
     600           0 :     throw RuntimeException();
     601             : }
     602             : 
     603             : OUString SAL_CALL
     604           0 : BreakIteratorImpl::getImplementationName(void) throw( RuntimeException, std::exception )
     605             : {
     606           0 :     return OUString("com.sun.star.i18n.BreakIterator");
     607             : }
     608             : 
     609             : sal_Bool SAL_CALL
     610           0 : BreakIteratorImpl::supportsService(const OUString& rServiceName) throw( RuntimeException, std::exception )
     611             : {
     612           0 :     return cppu::supportsService(this, rServiceName);
     613             : }
     614             : 
     615             : Sequence< OUString > SAL_CALL
     616           0 : BreakIteratorImpl::getSupportedServiceNames(void) throw( RuntimeException, std::exception )
     617             : {
     618           0 :     Sequence< OUString > aRet(1);
     619           0 :     aRet[0] = OUString("com.sun.star.i18n.BreakIterator");
     620           0 :     return aRet;
     621             : }
     622             : 
     623             : } } } }
     624             : 
     625             : extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * SAL_CALL
     626       16515 : com_sun_star_i18n_BreakIterator_get_implementation(
     627             :     css::uno::XComponentContext *context,
     628             :     css::uno::Sequence<css::uno::Any> const &)
     629             : {
     630       16515 :     return cppu::acquire(new com::sun::star::i18n::BreakIteratorImpl(context));
     631             : }
     632             : 
     633             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10