LCOV - code coverage report
Current view: top level - i18npool/source/breakiterator - breakiterator_unicode.cxx (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 193 236 81.8 %
Date: 2012-08-25 Functions: 16 19 84.2 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 227 367 61.9 %

           Branch data     Line data    Source code
       1                 :            : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2                 :            : /*
       3                 :            :  * This file is part of the LibreOffice project.
       4                 :            :  *
       5                 :            :  * This Source Code Form is subject to the terms of the Mozilla Public
       6                 :            :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7                 :            :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8                 :            :  *
       9                 :            :  * This file incorporates work covered by the following license notice:
      10                 :            :  *
      11                 :            :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12                 :            :  *   contributor license agreements. See the NOTICE file distributed
      13                 :            :  *   with this work for additional information regarding copyright
      14                 :            :  *   ownership. The ASF licenses this file to you under the Apache
      15                 :            :  *   License, Version 2.0 (the "License"); you may not use this file
      16                 :            :  *   except in compliance with the License. You may obtain a copy of
      17                 :            :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18                 :            :  */
      19                 :            : 
      20                 :            : #include <breakiterator_unicode.hxx>
      21                 :            : #include <localedata.hxx>
      22                 :            : #include <unicode/uchar.h>
      23                 :            : #include <unicode/locid.h>
      24                 :            : #include <unicode/rbbi.h>
      25                 :            : #include <unicode/udata.h>
      26                 :            : #include <rtl/strbuf.hxx>
      27                 :            : #include <rtl/ustring.hxx>
      28                 :            : #include <string.h>
      29                 :            : 
      30                 :            : U_CDECL_BEGIN
      31                 :            : extern const char OpenOffice_dat[];
      32                 :            : U_CDECL_END
      33                 :            : 
      34                 :            : using namespace ::com::sun::star;
      35                 :            : using namespace ::com::sun::star::lang;
      36                 :            : using namespace ::rtl;
      37                 :            : 
      38                 :            : namespace com { namespace sun { namespace star { namespace i18n {
      39                 :            : 
      40                 :            : #define ERROR ::com::sun::star::uno::RuntimeException()
      41                 :            : 
      42                 :            : 
      43                 :       1997 : BreakIterator_Unicode::BreakIterator_Unicode() :
      44                 :            :     cBreakIterator( "com.sun.star.i18n.BreakIterator_Unicode" ),    // implementation name
      45                 :            :     wordRule( "word" ),
      46                 :            :     lineRule( "line" ),
      47                 :            :     result(),
      48                 :            :     character(),
      49                 :            :     word(),
      50                 :            :     sentence(),
      51                 :            :     line(),
      52                 :            :     icuBI( NULL ),
      53                 :            :     aLocale(),
      54                 :            :     aBreakType(),
      55                 :       1997 :     aWordType()
      56                 :            : {
      57                 :       1997 : }
      58                 :            : 
      59                 :            : 
      60 [ +  - ][ +  - ]:       1983 : BreakIterator_Unicode::~BreakIterator_Unicode()
         [ +  - ][ +  - ]
      61                 :            : {
      62 [ +  + ][ +  - ]:       1983 :         if (icuBI && icuBI->aBreakIterator) {
      63 [ +  - ][ +  - ]:       1976 :             delete icuBI->aBreakIterator;
      64                 :       1976 :             icuBI->aBreakIterator=NULL;
      65                 :            :         }
      66 [ +  + ][ +  - ]:       1983 :         if (character.aBreakIterator) delete character.aBreakIterator;
                 [ +  - ]
      67 [ +  + ][ +  - ]:       1983 :         if (word.aBreakIterator) delete word.aBreakIterator;
                 [ +  - ]
      68 [ +  + ][ +  - ]:       1983 :         if (sentence.aBreakIterator) delete sentence.aBreakIterator;
                 [ +  - ]
      69 [ +  + ][ +  - ]:       1983 :         if (line.aBreakIterator) delete line.aBreakIterator;
                 [ +  - ]
      70         [ -  + ]:       3960 : }
      71                 :            : 
      72                 :            : /*
      73                 :            :     Wrapper class to provide public access to the RuleBasedBreakIterator's
      74                 :            :     setbreakType method.
      75                 :            : */
      76         [ -  + ]:     676730 : class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator {
      77                 :            :     public:
      78                 :     338390 :         inline void publicSetBreakType(int32_t type) {
      79                 :     338390 :             setBreakType(type);
      80                 :     338390 :         };
      81                 :     338390 :         OOoRuleBasedBreakIterator(UDataMemory* image,
      82                 :            :                 UErrorCode &status) :
      83                 :     338390 :             RuleBasedBreakIterator(image, status) { };
      84                 :            : 
      85                 :            : };
      86                 :            : 
      87                 :            : // loading ICU breakiterator on demand.
      88                 :    3973247 : void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale,
      89                 :            :         sal_Int16 rBreakType, sal_Int16 rWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException)
      90                 :            : {
      91                 :    3973247 :     sal_Bool newBreak = sal_False;
      92                 :    3973247 :     UErrorCode status = U_ZERO_ERROR;
      93                 :    3973247 :     sal_Int16 breakType = 0;
      94   [ +  +  +  +  :    3973247 :     switch (rBreakType) {
                      - ]
      95                 :    3551402 :         case LOAD_CHARACTER_BREAKITERATOR: icuBI=&character; breakType = 3; break;
      96                 :     277532 :         case LOAD_WORD_BREAKITERATOR: icuBI=&word;
      97   [ +  +  +  + ]:     277532 :             switch (rWordType) {
      98                 :       2198 :                 case WordType::ANYWORD_IGNOREWHITESPACES: breakType = 0; rule=wordRule = "edit_word"; break;
      99                 :     199712 :                 case WordType::DICTIONARY_WORD: breakType = 1; rule=wordRule = "dict_word"; break;
     100                 :      69110 :                 case WordType::WORD_COUNT: breakType = 2; rule=wordRule = "count_word"; break;
     101                 :            :             }
     102                 :     277532 :             break;
     103                 :       3910 :         case LOAD_SENTENCE_BREAKITERATOR: icuBI=&sentence; breakType = 5; break;
     104                 :     140403 :         case LOAD_LINE_BREAKITERATOR: icuBI=&line; breakType = 4; break;
     105                 :            :     }
     106 [ +  + ][ +  +  :   14878401 :     if (!icuBI->aBreakIterator || rWordType != aWordType ||
          +  +  +  +  -  
              + ][ +  + ]
     107                 :    7270318 :             rLocale.Language != aLocale.Language || rLocale.Country != aLocale.Country ||
     108                 :    3634836 :             rLocale.Variant != aLocale.Variant) {
     109         [ +  + ]:     338411 :         if (icuBI->aBreakIterator) {
     110 [ +  - ][ +  - ]:     336242 :             delete icuBI->aBreakIterator;
     111                 :     336242 :             icuBI->aBreakIterator=NULL;
     112                 :            :         }
     113         [ +  + ]:     338411 :         if (rule) {
     114 [ +  - ][ +  - ]:     338396 :             uno::Sequence< OUString > breakRules = LocaleData().getBreakIteratorRules(rLocale);
                 [ +  - ]
     115                 :            : 
     116                 :     338396 :             status = U_ZERO_ERROR;
     117         [ +  - ]:     338396 :             udata_setAppData("OpenOffice", OpenOffice_dat, &status);
     118 [ -  + ][ #  # ]:     338396 :             if ( !U_SUCCESS(status) ) throw ERROR;
     119                 :            : 
     120                 :     338396 :             OOoRuleBasedBreakIterator *rbi = NULL;
     121                 :            : 
     122 [ +  + ][ +  - ]:     338396 :             if (breakRules.getLength() > breakType && !breakRules[breakType].isEmpty())
         [ +  + ][ +  + ]
     123                 :            :             {
     124                 :            :                 rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", "brk",
     125 [ +  - ][ +  - ]:         13 :                     OUStringToOString(breakRules[breakType], RTL_TEXTENCODING_ASCII_US).getStr(), &status), status);
         [ +  - ][ +  - ]
                 [ +  - ]
     126                 :            :             }
     127 [ +  + ][ +  - ]:     338383 :             else if (rLocale.Language != "th" && rLocale.Language != "km") //use icu's breakiterator for Thai and Khmer
                 [ +  + ]
     128                 :            :             {
     129                 :     338377 :                 status = U_ZERO_ERROR;
     130                 :     338377 :                 OStringBuffer aUDName(64);
     131         [ +  - ]:     338377 :                 aUDName.append(rule);
     132         [ +  - ]:     338377 :                 aUDName.append('_');
     133 [ +  - ][ +  - ]:     338377 :                 aUDName.append( OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US));
     134         [ +  - ]:     338377 :                 UDataMemory* pUData = udata_open("OpenOffice", "brk", aUDName.getStr(), &status);
     135         [ +  + ]:     338377 :                 if( U_SUCCESS(status) )
     136 [ +  - ][ +  - ]:          6 :                     rbi = new OOoRuleBasedBreakIterator( pUData, status);
     137         [ +  + ]:     338377 :                 if (!U_SUCCESS(status) ) {
     138                 :     338371 :                     status = U_ZERO_ERROR;
     139         [ +  - ]:     338371 :                     pUData = udata_open("OpenOffice", "brk", rule, &status);
     140         [ +  - ]:     338371 :                     if( U_SUCCESS(status) )
     141 [ +  - ][ +  - ]:     338371 :                         rbi = new OOoRuleBasedBreakIterator( pUData, status);
     142         [ -  + ]:     338371 :                     if (!U_SUCCESS(status) ) icuBI->aBreakIterator=NULL;
     143                 :     338377 :                 }
     144                 :            :             }
     145         [ +  + ]:     338396 :             if (rbi) {
     146   [ +  +  +  +  :     338390 :                 switch (rBreakType) {
                      - ]
     147         [ +  - ]:      69141 :                     case LOAD_CHARACTER_BREAKITERATOR: rbi->publicSetBreakType(UBRK_CHARACTER); break;
     148         [ +  - ]:     170074 :                     case LOAD_WORD_BREAKITERATOR: rbi->publicSetBreakType(UBRK_WORD); break;
     149         [ +  - ]:         19 :                     case LOAD_SENTENCE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_SENTENCE); break;
     150         [ +  - ]:      99156 :                     case LOAD_LINE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_LINE); break;
     151                 :            :                 }
     152                 :     338390 :                 icuBI->aBreakIterator = rbi;
     153         [ +  - ]:     338396 :             }
     154                 :            :         }
     155                 :            : 
     156         [ +  + ]:     338411 :         if (!icuBI->aBreakIterator) {
     157                 :            :             icu::Locale icuLocale(
     158                 :            :                     OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US).getStr(),
     159                 :            :                     OUStringToOString(rLocale.Country, RTL_TEXTENCODING_ASCII_US).getStr(),
     160 [ +  - ][ +  - ]:         21 :                     OUStringToOString(rLocale.Variant, RTL_TEXTENCODING_ASCII_US).getStr());
         [ +  - ][ +  - ]
     161                 :            : 
     162                 :         21 :             status = U_ZERO_ERROR;
     163   [ -  +  -  -  :         21 :             switch (rBreakType) {
                      - ]
     164                 :            :                 case LOAD_CHARACTER_BREAKITERATOR:
     165         [ #  # ]:          0 :                     icuBI->aBreakIterator =  icu::BreakIterator::createCharacterInstance(icuLocale, status);
     166                 :          0 :                     break;
     167                 :            :                 case LOAD_WORD_BREAKITERATOR:
     168         [ +  - ]:         21 :                     icuBI->aBreakIterator =  icu::BreakIterator::createWordInstance(icuLocale, status);
     169                 :         21 :                     break;
     170                 :            :                 case LOAD_SENTENCE_BREAKITERATOR:
     171         [ #  # ]:          0 :                     icuBI->aBreakIterator = icu::BreakIterator::createSentenceInstance(icuLocale, status);
     172                 :          0 :                     break;
     173                 :            :                 case LOAD_LINE_BREAKITERATOR:
     174         [ #  # ]:          0 :                     icuBI->aBreakIterator = icu::BreakIterator::createLineInstance(icuLocale, status);
     175                 :          0 :                     break;
     176                 :            :             }
     177         [ -  + ]:         21 :             if ( !U_SUCCESS(status) ) {
     178                 :          0 :                 icuBI->aBreakIterator=NULL;
     179         [ #  # ]:          0 :                 throw ERROR;
     180         [ +  - ]:         21 :             }
     181                 :            :         }
     182         [ +  - ]:     338411 :         if (icuBI->aBreakIterator) {
     183                 :     338411 :             aLocale=rLocale;
     184                 :     338411 :             aWordType=rWordType;
     185                 :     338411 :             aBreakType=rBreakType;
     186                 :     338411 :             newBreak=sal_True;
     187                 :            :         } else {
     188         [ #  # ]:          0 :             throw ERROR;
     189                 :            :         }
     190                 :            :     }
     191                 :            : 
     192 [ +  + ][ +  + ]:    3973247 :     if (newBreak || !icuBI->aICUText.equals(rText))
                 [ +  + ]
     193                 :            :     {
     194                 :            :         // UChar != sal_Unicode in MinGW
     195                 :     359407 :         const UChar *pText = reinterpret_cast<const UChar *>(rText.getStr());
     196                 :            : 
     197         [ +  - ]:     359407 :         icuBI->ut = utext_openUChars(icuBI->ut, pText, rText.getLength(), &status);
     198                 :            : 
     199         [ -  + ]:     359407 :         if (!U_SUCCESS(status))
     200         [ #  # ]:          0 :             throw ERROR;
     201                 :            : 
     202         [ +  - ]:     359407 :         icuBI->aBreakIterator->setText(icuBI->ut, status);
     203                 :            : 
     204         [ -  + ]:     359407 :         if (!U_SUCCESS(status))
     205         [ #  # ]:          0 :             throw ERROR;
     206                 :            : 
     207                 :     359407 :         icuBI->aICUText = rText;
     208                 :            :     }
     209                 :    3973247 : }
     210                 :            : 
     211                 :    3646773 : sal_Int32 SAL_CALL BreakIterator_Unicode::nextCharacters( const OUString& Text,
     212                 :            :         sal_Int32 nStartPos, const lang::Locale &rLocale,
     213                 :            :         sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
     214                 :            :         throw(uno::RuntimeException)
     215                 :            : {
     216         [ +  + ]:    3646773 :         if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
     217                 :    3551378 :             loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
     218         [ +  + ]:    7098912 :             for (nDone = 0; nDone < nCount; nDone++) {
     219                 :    3547534 :                 nStartPos = character.aBreakIterator->following(nStartPos);
     220         [ -  + ]:    3547534 :                 if (nStartPos == BreakIterator::DONE)
     221                 :          0 :                     return Text.getLength();
     222                 :            :             }
     223                 :            :         } else { // for CHARACTER mode
     224 [ +  + ][ +  - ]:     190790 :             for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++)
                 [ +  + ]
     225                 :      95395 :                 Text.iterateCodePoints(&nStartPos, 1);
     226                 :            :         }
     227                 :    3646773 :         return nStartPos;
     228                 :            : }
     229                 :            : 
     230                 :      69078 : sal_Int32 SAL_CALL BreakIterator_Unicode::previousCharacters( const OUString& Text,
     231                 :            :         sal_Int32 nStartPos, const lang::Locale& rLocale,
     232                 :            :         sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
     233                 :            :         throw(uno::RuntimeException)
     234                 :            : {
     235         [ +  + ]:      69078 :         if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
     236                 :         24 :             loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
     237         [ +  + ]:         48 :             for (nDone = 0; nDone < nCount; nDone++) {
     238                 :         24 :                 nStartPos = character.aBreakIterator->preceding(nStartPos);
     239         [ -  + ]:         24 :                 if (nStartPos == BreakIterator::DONE)
     240                 :          0 :                     return 0;
     241                 :            :             }
     242                 :            :         } else { // for BS to delete one char and CHARACTER mode.
     243 [ +  + ][ +  - ]:     138108 :             for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++)
                 [ +  + ]
     244                 :      69054 :                 Text.iterateCodePoints(&nStartPos, -1);
     245                 :            :         }
     246                 :      69078 :         return nStartPos;
     247                 :            : }
     248                 :            : 
     249                 :            : 
     250                 :        857 : Boundary SAL_CALL BreakIterator_Unicode::nextWord( const OUString& Text, sal_Int32 nStartPos,
     251                 :            :     const lang::Locale& rLocale, sal_Int16 rWordType ) throw(uno::RuntimeException)
     252                 :            : {
     253                 :        857 :         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
     254                 :            : 
     255                 :        857 :         result.startPos = word.aBreakIterator->following(nStartPos);
     256 [ -  + ][ +  + ]:        857 :         if( result.startPos >= Text.getLength() || result.startPos == BreakIterator::DONE )
                 [ +  + ]
     257                 :         46 :             result.endPos = result.startPos;
     258                 :            :         else {
     259         [ +  + ]:       1622 :             if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
           [ +  -  +  + ]
                 [ +  + ]
     260                 :            :                     rWordType == WordType::DICTIONARY_WORD ) &&
     261                 :        811 :                         u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
     262                 :         12 :                 result.startPos = word.aBreakIterator->following(result.startPos);
     263                 :            : 
     264                 :        811 :             result.endPos = word.aBreakIterator->following(result.startPos);
     265         [ -  + ]:        811 :             if(result.endPos == BreakIterator::DONE)
     266                 :          0 :                 result.endPos = result.startPos;
     267                 :            :         }
     268                 :        857 :         return result;
     269                 :            : }
     270                 :            : 
     271                 :            : 
     272                 :         40 : Boundary SAL_CALL BreakIterator_Unicode::previousWord(const OUString& Text, sal_Int32 nStartPos,
     273                 :            :         const lang::Locale& rLocale, sal_Int16 rWordType) throw(uno::RuntimeException)
     274                 :            : {
     275                 :         40 :         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
     276                 :            : 
     277                 :         40 :         result.startPos = word.aBreakIterator->preceding(nStartPos);
     278 [ -  + ][ +  - ]:         40 :         if( result.startPos < 0 || result.startPos == BreakIterator::DONE)
     279                 :          0 :             result.endPos = result.startPos;
     280                 :            :         else {
     281         [ +  + ]:         80 :             if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
           [ +  -  -  + ]
                 [ -  + ]
     282                 :            :                     rWordType == WordType::DICTIONARY_WORD) &&
     283                 :         40 :                         u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
     284                 :          0 :                 result.startPos = word.aBreakIterator->preceding(result.startPos);
     285                 :            : 
     286                 :         40 :             result.endPos = word.aBreakIterator->following(result.startPos);
     287         [ -  + ]:         40 :             if(result.endPos == BreakIterator::DONE)
     288                 :          0 :                 result.endPos = result.startPos;
     289                 :            :         }
     290                 :         40 :         return result;
     291                 :            : }
     292                 :            : 
     293                 :            : 
     294                 :     276635 : Boundary SAL_CALL BreakIterator_Unicode::getWordBoundary( const OUString& Text, sal_Int32 nPos, const lang::Locale& rLocale,
     295                 :            :         sal_Int16 rWordType, sal_Bool bDirection ) throw(uno::RuntimeException)
     296                 :            : {
     297                 :     276635 :         loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
     298                 :     276635 :         sal_Int32 len = Text.getLength();
     299                 :            : 
     300         [ +  + ]:     276635 :         if(word.aBreakIterator->isBoundary(nPos)) {
     301                 :     150984 :             result.startPos = result.endPos = nPos;
     302 [ +  + ][ -  + ]:     150984 :             if((bDirection || nPos == 0) && nPos < len) //forward
                 [ +  - ]
     303                 :     148674 :                 result.endPos = word.aBreakIterator->following(nPos);
     304                 :            :             else
     305                 :     150984 :                 result.startPos = word.aBreakIterator->preceding(nPos);
     306                 :            :         } else {
     307         [ -  + ]:     125651 :             if(nPos <= 0) {
     308                 :          0 :                 result.startPos = 0;
     309         [ #  # ]:          0 :                 result.endPos = len ? word.aBreakIterator->following((sal_Int32)0) : 0;
     310         [ -  + ]:     125651 :             } else if(nPos >= len) {
     311                 :          0 :                 result.startPos = word.aBreakIterator->preceding(len);
     312                 :          0 :                 result.endPos = len;
     313                 :            :             } else {
     314                 :     125651 :                 result.startPos = word.aBreakIterator->preceding(nPos);
     315                 :     125651 :                 result.endPos = word.aBreakIterator->following(nPos);
     316                 :            :             }
     317                 :            :         }
     318         [ -  + ]:     276635 :         if (result.startPos == BreakIterator::DONE)
     319                 :          0 :             result.startPos = result.endPos;
     320         [ -  + ]:     276635 :         else if (result.endPos == BreakIterator::DONE)
     321                 :          0 :             result.endPos = result.startPos;
     322                 :            : 
     323                 :     276635 :         return result;
     324                 :            : }
     325                 :            : 
     326                 :            : 
     327                 :         30 : sal_Int32 SAL_CALL BreakIterator_Unicode::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
     328                 :            :         const lang::Locale &rLocale ) throw(uno::RuntimeException)
     329                 :            : {
     330                 :         30 :         loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
     331                 :            : 
     332                 :         30 :         sal_Int32 len = Text.getLength();
     333 [ +  + ][ +  - ]:         30 :         if (len > 0 && nStartPos == len)
     334                 :          6 :             Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
     335         [ +  + ]:         30 :         if (!sentence.aBreakIterator->isBoundary(nStartPos))
     336                 :         18 :             nStartPos = sentence.aBreakIterator->preceding(nStartPos);
     337                 :            : 
     338                 :            :         // skip preceding space.
     339                 :         30 :         sal_uInt32 ch = Text.iterateCodePoints(&nStartPos, 1);
     340 [ +  + ][ +  + ]:         44 :         while (nStartPos < len && u_isWhitespace(ch)) ch = Text.iterateCodePoints(&nStartPos, 1);
                 [ +  + ]
     341                 :         30 :         Text.iterateCodePoints(&nStartPos, -1);
     342                 :            : 
     343                 :         30 :         return nStartPos;
     344                 :            : }
     345                 :            : 
     346                 :       3880 : sal_Int32 SAL_CALL BreakIterator_Unicode::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
     347                 :            :         const lang::Locale &rLocale ) throw(uno::RuntimeException)
     348                 :            : {
     349         [ +  - ]:       3880 :         loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
     350                 :            : 
     351                 :       3880 :         sal_Int32 len = Text.getLength();
     352 [ +  + ][ +  - ]:       3880 :         if (len > 0 && nStartPos == len)
     353         [ +  - ]:         16 :             Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
     354         [ +  - ]:       3880 :         nStartPos = sentence.aBreakIterator->following(nStartPos);
     355                 :            : 
     356                 :       3880 :         sal_Int32 nPos=nStartPos;
     357 [ +  + ][ +  - ]:       3926 :         while (nPos > 0 && u_isWhitespace(Text.iterateCodePoints(&nPos, -1))) nStartPos=nPos;
         [ +  - ][ +  + ]
                 [ +  + ]
     358                 :            : 
     359                 :       3880 :         return nStartPos;
     360                 :            : }
     361                 :            : 
     362                 :     140403 : LineBreakResults SAL_CALL BreakIterator_Unicode::getLineBreak(
     363                 :            :         const OUString& Text, sal_Int32 nStartPos,
     364                 :            :         const lang::Locale& rLocale, sal_Int32 nMinBreakPos,
     365                 :            :         const LineBreakHyphenationOptions& hOptions,
     366                 :            :         const LineBreakUserOptions& /*rOptions*/ ) throw(uno::RuntimeException)
     367                 :            : {
     368                 :     140403 :         LineBreakResults lbr;
     369                 :            : 
     370         [ -  + ]:     140403 :         if (nStartPos >= Text.getLength()) {
     371                 :          0 :             lbr.breakIndex = Text.getLength();
     372                 :          0 :             lbr.breakType = BreakType::WORDBOUNDARY;
     373                 :          0 :             return lbr;
     374                 :            :         }
     375                 :            : 
     376         [ +  - ]:     140403 :         loadICUBreakIterator(rLocale, LOAD_LINE_BREAKITERATOR, 0, lineRule, Text);
     377                 :            : 
     378                 :     140403 :         sal_Bool GlueSpace=sal_True;
     379         [ +  + ]:     280806 :         while (GlueSpace) {
     380 [ +  - ][ +  + ]:     140403 :             if (line.aBreakIterator->preceding(nStartPos + 1) == nStartPos) { //Line boundary break
     381                 :      34978 :                 lbr.breakIndex = nStartPos;
     382                 :      34978 :                 lbr.breakType = BreakType::WORDBOUNDARY;
     383         [ +  + ]:     105425 :             } else if (hOptions.rHyphenator.is()) { //Hyphenation break
     384                 :            :                 Boundary wBoundary = getWordBoundary( Text, nStartPos, rLocale,
     385         [ +  - ]:      64134 :                                                 WordType::DICTIONARY_WORD, false);
     386                 :      64134 :                 uno::Reference< linguistic2::XHyphenatedWord > aHyphenatedWord;
     387         [ +  - ]:      64134 :                 aHyphenatedWord = hOptions.rHyphenator->hyphenate(Text.copy(wBoundary.startPos,
     388                 :            :                     wBoundary.endPos - wBoundary.startPos), rLocale,
     389 [ +  - ][ +  - ]:      64134 :                     (sal_Int16) (hOptions.hyphenIndex - wBoundary.startPos), hOptions.aHyphenationOptions);
     390         [ -  + ]:      64134 :                 if (aHyphenatedWord.is()) {
     391         [ #  # ]:          0 :                     lbr.rHyphenatedWord = aHyphenatedWord;
     392 [ #  # ][ #  # ]:          0 :                     if(wBoundary.startPos + aHyphenatedWord->getHyphenationPos() + 1 < nMinBreakPos )
                 [ #  # ]
     393                 :          0 :                         lbr.breakIndex = -1;
     394                 :            :                     else
     395                 :          0 :                         lbr.breakIndex = wBoundary.startPos; //aHyphenatedWord->getHyphenationPos();
     396                 :          0 :                     lbr.breakType = BreakType::HYPHENATION;
     397                 :            :                 } else {
     398         [ +  - ]:      64134 :                     lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
     399                 :      64134 :                     lbr.breakType = BreakType::WORDBOUNDARY;;
     400                 :      64134 :                 }
     401                 :            :             } else { //word boundary break
     402         [ +  - ]:      41291 :                 lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
     403                 :      41291 :                 lbr.breakType = BreakType::WORDBOUNDARY;
     404                 :            :             }
     405                 :            : 
     406                 :            : #define WJ 0x2060   // Word Joiner
     407                 :     140403 :             GlueSpace=sal_False;
     408         [ +  - ]:     140403 :             if (lbr.breakType == BreakType::WORDBOUNDARY) {
     409                 :     140403 :                 nStartPos = lbr.breakIndex;
     410         [ -  + ]:     140403 :                 if (Text[nStartPos--] == WJ)
     411                 :          0 :                     GlueSpace=sal_True;
     412         [ +  + ]:     416223 :                 while (nStartPos >= 0 &&
           [ +  +  -  + ]
                 [ +  + ]
     413 [ +  - ][ +  - ]:     206794 :                     (u_isWhitespace(Text.iterateCodePoints(&nStartPos, 0)) || Text[nStartPos] == WJ)) {
     414         [ -  + ]:      69026 :                     if (Text[nStartPos--] == WJ)
     415                 :          0 :                         GlueSpace=sal_True;
     416                 :            :                 }
     417 [ -  + ][ #  # ]:     140403 :                 if (GlueSpace && nStartPos < 0)  {
     418                 :          0 :                     lbr.breakIndex = 0;
     419                 :          0 :                     break;
     420                 :            :                 }
     421                 :            :             }
     422                 :            :         }
     423                 :            : 
     424                 :     140403 :         return lbr;
     425                 :            : }
     426                 :            : 
     427                 :            : 
     428                 :            : 
     429                 :            : OUString SAL_CALL
     430                 :          0 : BreakIterator_Unicode::getImplementationName(void) throw( uno::RuntimeException )
     431                 :            : {
     432                 :          0 :         return OUString::createFromAscii(cBreakIterator);
     433                 :            : }
     434                 :            : 
     435                 :            : sal_Bool SAL_CALL
     436                 :          0 : BreakIterator_Unicode::supportsService(const OUString& rServiceName) throw( uno::RuntimeException )
     437                 :            : {
     438                 :          0 :         return !rServiceName.compareToAscii(cBreakIterator);
     439                 :            : }
     440                 :            : 
     441                 :            : uno::Sequence< OUString > SAL_CALL
     442                 :          0 : BreakIterator_Unicode::getSupportedServiceNames(void) throw( uno::RuntimeException )
     443                 :            : {
     444                 :          0 :         uno::Sequence< OUString > aRet(1);
     445         [ #  # ]:          0 :         aRet[0] = OUString::createFromAscii(cBreakIterator);
     446                 :          0 :         return aRet;
     447                 :            : }
     448                 :            : 
     449                 :            : } } } }
     450                 :            : 
     451                 :            : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10