LCOV - code coverage report
Current view: top level - i18npool/source/breakiterator - breakiterator_unicode.cxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 209 254 82.3 %
Date: 2015-06-13 12:38:46 Functions: 17 20 85.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include <breakiterator_unicode.hxx>
      21             : #include <cppuhelper/supportsservice.hxx>
      22             : #include <localedata.hxx>
      23             : #include <i18nlangtag/languagetag.hxx>
      24             : #include <i18nlangtag/languagetagicu.hxx>
      25             : #include <unicode/uchar.h>
      26             : #include <unicode/locid.h>
      27             : #include <unicode/rbbi.h>
      28             : #include <unicode/udata.h>
      29             : #include <rtl/strbuf.hxx>
      30             : #include <rtl/ustring.hxx>
      31             : #include <string.h>
      32             : 
      33             : U_CDECL_BEGIN
      34             : extern const char OpenOffice_dat[];
      35             : U_CDECL_END
      36             : 
      37             : using namespace ::com::sun::star;
      38             : using namespace ::com::sun::star::lang;
      39             : 
      40             : namespace com { namespace sun { namespace star { namespace i18n {
      41             : 
      42             : 
      43        5086 : BreakIterator_Unicode::BreakIterator_Unicode()
      44             :     : cBreakIterator( "com.sun.star.i18n.BreakIterator_Unicode" )    // implementation name
      45             :     , lineRule( "line" )
      46             :     , icuBI( NULL )
      47        5086 :     , aBreakType(0)
      48             : {
      49        5086 : }
      50             : 
      51       14564 : BreakIterator_Unicode::~BreakIterator_Unicode()
      52             : {
      53        5056 :     delete character.aBreakIterator;
      54        5056 :     delete sentence.aBreakIterator;
      55        5056 :     delete line.aBreakIterator;
      56       25280 :     for (size_t i = 0; i < SAL_N_ELEMENTS(words); i++)
      57       20224 :         delete words[i].aBreakIterator;
      58        9508 : }
      59             : 
      60             : /*
      61             :     Wrapper class to provide public access to the RuleBasedBreakIterator's
      62             :     setbreakType method.
      63             : */
      64       11028 : class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator
      65             : {
      66             :     public:
      67        5572 :     inline void publicSetBreakType(int32_t type)
      68             :         {
      69        5572 :             setBreakType(type);
      70        5572 :         };
      71        5572 :     OOoRuleBasedBreakIterator(UDataMemory* image,
      72             :                               UErrorCode &status)
      73        5572 :         : RuleBasedBreakIterator(image, status)
      74        5572 :         { };
      75             : 
      76             : };
      77             : 
      78             : // loading ICU breakiterator on demand.
      79   142910742 : void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale,
      80             :         sal_Int16 rBreakType, sal_Int16 nWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException)
      81             : {
      82   142910742 :     bool newBreak = false;
      83   142910742 :     UErrorCode status = U_ZERO_ERROR;
      84   142910742 :     sal_Int16 breakType = 0;
      85   142910742 :     switch (rBreakType) {
      86     8619790 :         case LOAD_CHARACTER_BREAKITERATOR: icuBI=&character; breakType = 3; break;
      87             :         case LOAD_WORD_BREAKITERATOR:
      88             :             assert (nWordType >= 0 && nWordType<= WordType::WORD_COUNT);
      89   134247732 :             icuBI=&words[nWordType];
      90   134247732 :             switch (nWordType) {
      91        2616 :                 case WordType::ANY_WORD: break; // odd but previous behavior
      92             :                 case WordType::ANYWORD_IGNOREWHITESPACES:
      93         932 :                     breakType = 0; rule = "edit_word"; break;
      94             :                 case WordType::DICTIONARY_WORD:
      95   133985831 :                     breakType = 1; rule = "dict_word"; break;
      96             :                 default:
      97             :                 case WordType::WORD_COUNT:
      98      258353 :                     breakType = 2; rule = "count_word"; break;
      99             :             }
     100   134247732 :             break;
     101         697 :         case LOAD_SENTENCE_BREAKITERATOR: icuBI=&sentence; breakType = 5; break;
     102       42523 :         case LOAD_LINE_BREAKITERATOR: icuBI=&line; breakType = 4; break;
     103             :     }
     104   428727008 :     if (!icuBI->aBreakIterator ||
     105   285810771 :         rLocale.Language != icuBI->maLocale.Language ||
     106   428721092 :         rLocale.Country  != icuBI->maLocale.Country  ||
     107   142905103 :         rLocale.Variant  != icuBI->maLocale.Variant) {
     108        5639 :         if (icuBI->aBreakIterator) {
     109         421 :             delete icuBI->aBreakIterator;
     110         421 :             icuBI->aBreakIterator=NULL;
     111             :         }
     112        5639 :         if (rule) {
     113        5576 :             uno::Sequence< OUString > breakRules = LocaleDataImpl().getBreakIteratorRules(rLocale);
     114             : 
     115        5576 :             status = U_ZERO_ERROR;
     116        5576 :             udata_setAppData("OpenOffice", OpenOffice_dat, &status);
     117        5576 :             if ( !U_SUCCESS(status) ) throw uno::RuntimeException();
     118             : 
     119        5576 :             OOoRuleBasedBreakIterator *rbi = NULL;
     120             : 
     121        5576 :             if (breakRules.getLength() > breakType && !breakRules[breakType].isEmpty())
     122             :             {
     123             :                 rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", "brk",
     124          55 :                     OUStringToOString(breakRules[breakType], RTL_TEXTENCODING_ASCII_US).getStr(), &status), status);
     125             :             }
     126             :             //use icu's breakiterator for Thai, Khmer, Tibetan and Dzongkha
     127        5521 :             else if (rLocale.Language != "th" && rLocale.Language != "lo" && rLocale.Language != "km" && rLocale.Language != "bo" && rLocale.Language != "dz")
     128             :             {
     129        5517 :                 status = U_ZERO_ERROR;
     130        5517 :                 OStringBuffer aUDName(64);
     131        5517 :                 aUDName.append(rule);
     132        5517 :                 aUDName.append('_');
     133        5517 :                 aUDName.append( OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US));
     134        5517 :                 UDataMemory* pUData = udata_open("OpenOffice", "brk", aUDName.getStr(), &status);
     135        5517 :                 if( U_SUCCESS(status) )
     136           5 :                     rbi = new OOoRuleBasedBreakIterator( pUData, status);
     137        5517 :                 if (!U_SUCCESS(status) ) {
     138        5512 :                     status = U_ZERO_ERROR;
     139        5512 :                     pUData = udata_open("OpenOffice", "brk", rule, &status);
     140        5512 :                     if( U_SUCCESS(status) )
     141        5512 :                         rbi = new OOoRuleBasedBreakIterator( pUData, status);
     142        5512 :                     if (!U_SUCCESS(status) ) icuBI->aBreakIterator=NULL;
     143        5517 :                 }
     144             :             }
     145        5576 :             if (rbi) {
     146        5572 :                 switch (rBreakType) {
     147        3725 :                     case LOAD_CHARACTER_BREAKITERATOR: rbi->publicSetBreakType(UBRK_CHARACTER); break;
     148        1155 :                     case LOAD_WORD_BREAKITERATOR: rbi->publicSetBreakType(UBRK_WORD); break;
     149          57 :                     case LOAD_SENTENCE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_SENTENCE); break;
     150         635 :                     case LOAD_LINE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_LINE); break;
     151             :                 }
     152        5572 :                 icuBI->aBreakIterator = rbi;
     153        5576 :             }
     154             :         }
     155             : 
     156        5639 :         if (!icuBI->aBreakIterator) {
     157          67 :             icu::Locale icuLocale( LanguageTagIcu::getIcuLocale( LanguageTag( rLocale)));
     158             : 
     159          67 :             status = U_ZERO_ERROR;
     160          67 :             switch (rBreakType) {
     161             :                 case LOAD_CHARACTER_BREAKITERATOR:
     162           0 :                     icuBI->aBreakIterator =  icu::BreakIterator::createCharacterInstance(icuLocale, status);
     163           0 :                     break;
     164             :                 case LOAD_WORD_BREAKITERATOR:
     165          67 :                     icuBI->aBreakIterator =  icu::BreakIterator::createWordInstance(icuLocale, status);
     166          67 :                     break;
     167             :                 case LOAD_SENTENCE_BREAKITERATOR:
     168           0 :                     icuBI->aBreakIterator = icu::BreakIterator::createSentenceInstance(icuLocale, status);
     169           0 :                     break;
     170             :                 case LOAD_LINE_BREAKITERATOR:
     171           0 :                     icuBI->aBreakIterator = icu::BreakIterator::createLineInstance(icuLocale, status);
     172           0 :                     break;
     173             :             }
     174          67 :             if ( !U_SUCCESS(status) ) {
     175           0 :                 icuBI->aBreakIterator=NULL;
     176           0 :                 throw uno::RuntimeException();
     177          67 :             }
     178             :         }
     179        5639 :         if (icuBI->aBreakIterator) {
     180        5639 :             icuBI->maLocale=rLocale;
     181        5639 :             newBreak=true;
     182             :         } else {
     183           0 :             throw uno::RuntimeException();
     184             :         }
     185             :     }
     186             : 
     187   142910742 :     if (newBreak || icuBI->aICUText.pData != rText.pData)
     188             :     {
     189             :         // UChar != sal_Unicode in MinGW
     190      372568 :         const UChar *pText = reinterpret_cast<const UChar *>(rText.getStr());
     191             : 
     192      372568 :         icuBI->ut = utext_openUChars(icuBI->ut, pText, rText.getLength(), &status);
     193             : 
     194      372568 :         if (!U_SUCCESS(status))
     195           0 :             throw uno::RuntimeException();
     196             : 
     197      372568 :         icuBI->aBreakIterator->setText(icuBI->ut, status);
     198             : 
     199      372568 :         if (!U_SUCCESS(status))
     200           0 :             throw uno::RuntimeException();
     201             : 
     202      372568 :         icuBI->aICUText = rText;
     203             :     }
     204   142910742 : }
     205             : 
     206     8604646 : sal_Int32 SAL_CALL BreakIterator_Unicode::nextCharacters( const OUString& Text,
     207             :         sal_Int32 nStartPos, const lang::Locale &rLocale,
     208             :         sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
     209             :         throw(uno::RuntimeException, std::exception)
     210             : {
     211     8604646 :     if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
     212     8555834 :         loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
     213    17110987 :         for (nDone = 0; nDone < nCount; nDone++) {
     214     8555193 :             nStartPos = character.aBreakIterator->following(nStartPos);
     215     8555193 :             if (nStartPos == BreakIterator::DONE)
     216          40 :                 return Text.getLength();
     217             :         }
     218             :     } else { // for CHARACTER mode
     219       97624 :         for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++)
     220       48812 :             Text.iterateCodePoints(&nStartPos, 1);
     221             :     }
     222     8604606 :     return nStartPos;
     223             : }
     224             : 
     225       98900 : sal_Int32 SAL_CALL BreakIterator_Unicode::previousCharacters( const OUString& Text,
     226             :         sal_Int32 nStartPos, const lang::Locale& rLocale,
     227             :         sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
     228             :         throw(uno::RuntimeException, std::exception)
     229             : {
     230       98900 :     if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
     231       63956 :         loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
     232      127912 :         for (nDone = 0; nDone < nCount; nDone++) {
     233       63956 :             nStartPos = character.aBreakIterator->preceding(nStartPos);
     234       63956 :             if (nStartPos == BreakIterator::DONE)
     235           0 :                 return 0;
     236             :         }
     237             :     } else { // for BS to delete one char and CHARACTER mode.
     238       69888 :         for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++)
     239       34944 :             Text.iterateCodePoints(&nStartPos, -1);
     240             :     }
     241       98900 :     return nStartPos;
     242             : }
     243             : 
     244             : 
     245        2799 : Boundary SAL_CALL BreakIterator_Unicode::nextWord( const OUString& Text, sal_Int32 nStartPos,
     246             :     const lang::Locale& rLocale, sal_Int16 rWordType ) throw(uno::RuntimeException, std::exception)
     247             : {
     248        2799 :     loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
     249             : 
     250        2799 :     result.startPos = icuBI->aBreakIterator->following(nStartPos);
     251        2799 :     if( result.startPos >= Text.getLength() || result.startPos == BreakIterator::DONE )
     252         212 :         result.endPos = result.startPos;
     253             :     else {
     254        4806 :         if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
     255        5174 :                     rWordType == WordType::DICTIONARY_WORD ) &&
     256        2587 :                 u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
     257          59 :             result.startPos = icuBI->aBreakIterator->following(result.startPos);
     258             : 
     259        2587 :         result.endPos = icuBI->aBreakIterator->following(result.startPos);
     260        2587 :         if(result.endPos == BreakIterator::DONE)
     261           0 :             result.endPos = result.startPos;
     262             :     }
     263        2799 :     return result;
     264             : }
     265             : 
     266             : 
     267         429 : Boundary SAL_CALL BreakIterator_Unicode::previousWord(const OUString& Text, sal_Int32 nStartPos,
     268             :         const lang::Locale& rLocale, sal_Int16 rWordType) throw(uno::RuntimeException, std::exception)
     269             : {
     270         429 :     loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
     271             : 
     272         429 :     result.startPos = icuBI->aBreakIterator->preceding(nStartPos);
     273         429 :     if( result.startPos < 0 || result.startPos == BreakIterator::DONE)
     274           0 :         result.endPos = result.startPos;
     275             :     else {
     276         432 :         if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
     277         858 :                     rWordType == WordType::DICTIONARY_WORD) &&
     278         429 :                 u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
     279           0 :             result.startPos = icuBI->aBreakIterator->preceding(result.startPos);
     280             : 
     281         429 :         result.endPos = icuBI->aBreakIterator->following(result.startPos);
     282         429 :         if(result.endPos == BreakIterator::DONE)
     283           0 :             result.endPos = result.startPos;
     284             :     }
     285         429 :     return result;
     286             : }
     287             : 
     288             : 
     289   134244504 : Boundary SAL_CALL BreakIterator_Unicode::getWordBoundary( const OUString& Text, sal_Int32 nPos, const lang::Locale& rLocale,
     290             :         sal_Int16 rWordType, sal_Bool bDirection ) throw(uno::RuntimeException, std::exception)
     291             : {
     292   134244504 :     loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
     293   134244504 :     sal_Int32 len = Text.getLength();
     294             : 
     295   134244504 :     if(icuBI->aBreakIterator->isBoundary(nPos)) {
     296   134188880 :         result.startPos = result.endPos = nPos;
     297   134188880 :         if((bDirection || nPos == 0) && nPos < len) //forward
     298    97481263 :             result.endPos = icuBI->aBreakIterator->following(nPos);
     299             :         else
     300    36707617 :             result.startPos = icuBI->aBreakIterator->preceding(nPos);
     301             :     } else {
     302       55624 :         if(nPos <= 0) {
     303           0 :             result.startPos = 0;
     304           0 :             result.endPos = len ? icuBI->aBreakIterator->following((sal_Int32)0) : 0;
     305       55624 :         } else if(nPos >= len) {
     306           0 :             result.startPos = icuBI->aBreakIterator->preceding(len);
     307           0 :             result.endPos = len;
     308             :         } else {
     309       55624 :             result.startPos = icuBI->aBreakIterator->preceding(nPos);
     310       55624 :             result.endPos = icuBI->aBreakIterator->following(nPos);
     311             :         }
     312             :     }
     313   134244504 :     if (result.startPos == BreakIterator::DONE)
     314           0 :         result.startPos = result.endPos;
     315   134244504 :     else if (result.endPos == BreakIterator::DONE)
     316           0 :         result.endPos = result.startPos;
     317             : 
     318   134244504 :     return result;
     319             : }
     320             : 
     321             : 
     322           8 : sal_Int32 SAL_CALL BreakIterator_Unicode::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
     323             :         const lang::Locale &rLocale ) throw(uno::RuntimeException, std::exception)
     324             : {
     325           8 :     loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
     326             : 
     327           8 :     sal_Int32 len = Text.getLength();
     328           8 :     if (len > 0 && nStartPos == len)
     329           1 :         Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
     330           8 :     if (!sentence.aBreakIterator->isBoundary(nStartPos))
     331           6 :         nStartPos = sentence.aBreakIterator->preceding(nStartPos);
     332             : 
     333             :     // skip preceding space.
     334           8 :     sal_uInt32 ch = Text.iterateCodePoints(&nStartPos, 1);
     335           8 :     while (nStartPos < len && u_isWhitespace(ch)) ch = Text.iterateCodePoints(&nStartPos, 1);
     336           8 :     Text.iterateCodePoints(&nStartPos, -1);
     337             : 
     338           8 :     return nStartPos;
     339             : }
     340             : 
     341         689 : sal_Int32 SAL_CALL BreakIterator_Unicode::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
     342             :         const lang::Locale &rLocale ) throw(uno::RuntimeException, std::exception)
     343             : {
     344         689 :     loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
     345             : 
     346         689 :     sal_Int32 len = Text.getLength();
     347         689 :     if (len > 0 && nStartPos == len)
     348          41 :         Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
     349         689 :     nStartPos = sentence.aBreakIterator->following(nStartPos);
     350             : 
     351         689 :     sal_Int32 nPos=nStartPos;
     352         689 :     while (nPos > 0 && u_isWhitespace(Text.iterateCodePoints(&nPos, -1))) nStartPos=nPos;
     353             : 
     354         689 :     return nStartPos;
     355             : }
     356             : 
     357       42523 : LineBreakResults SAL_CALL BreakIterator_Unicode::getLineBreak(
     358             :         const OUString& Text, sal_Int32 nStartPos,
     359             :         const lang::Locale& rLocale, sal_Int32 nMinBreakPos,
     360             :         const LineBreakHyphenationOptions& hOptions,
     361             :         const LineBreakUserOptions& /*rOptions*/ ) throw(uno::RuntimeException, std::exception)
     362             : {
     363       42523 :     LineBreakResults lbr;
     364             : 
     365       42523 :     if (nStartPos >= Text.getLength()) {
     366           0 :         lbr.breakIndex = Text.getLength();
     367           0 :         lbr.breakType = BreakType::WORDBOUNDARY;
     368           0 :         return lbr;
     369             :     }
     370             : 
     371       42523 :     loadICUBreakIterator(rLocale, LOAD_LINE_BREAKITERATOR, 0, lineRule, Text);
     372             : 
     373       42523 :     bool GlueSpace=true;
     374      127569 :     while (GlueSpace) {
     375       42523 :         if (line.aBreakIterator->preceding(nStartPos + 1) == nStartPos) { //Line boundary break
     376        4464 :             lbr.breakIndex = nStartPos;
     377        4464 :             lbr.breakType = BreakType::WORDBOUNDARY;
     378       38059 :         } else if (hOptions.rHyphenator.is()) { //Hyphenation break
     379        3885 :             sal_Int32 boundary_with_punctuation = (line.aBreakIterator->next() != BreakIterator::DONE) ? line.aBreakIterator->current() : 0;
     380        3885 :             line.aBreakIterator->preceding(nStartPos + 1); // reset to check correct hyphenation of "word-word"
     381             : 
     382        3885 :             sal_Int32 nStartPosWordEnd = nStartPos;
     383        7876 :             while (line.aBreakIterator->current() < nStartPosWordEnd && u_ispunct((sal_uInt32)Text[nStartPosWordEnd])) // starting punctuation
     384         106 :                 nStartPosWordEnd --;
     385             : 
     386             :             Boundary wBoundary = getWordBoundary( Text, nStartPosWordEnd, rLocale,
     387        3885 :                 WordType::DICTIONARY_WORD, false);
     388             : 
     389        3885 :             nStartPosWordEnd = wBoundary.endPos;
     390        7880 :             while (nStartPosWordEnd < Text.getLength() && (u_ispunct((sal_uInt32)Text[nStartPosWordEnd]))) // ending punctuation
     391         110 :                 nStartPosWordEnd ++;
     392        3885 :             nStartPosWordEnd = nStartPosWordEnd - wBoundary.endPos;
     393        3885 :             if (hOptions.hyphenIndex - wBoundary.startPos < nStartPosWordEnd) nStartPosWordEnd = hOptions.hyphenIndex - wBoundary.startPos;
     394             : #define SPACE 0x0020
     395        3885 :             while (boundary_with_punctuation > wBoundary.endPos && Text[--boundary_with_punctuation] == SPACE);
     396        3885 :             if (boundary_with_punctuation != 0) boundary_with_punctuation += 1 - wBoundary.endPos;
     397        3885 :             uno::Reference< linguistic2::XHyphenatedWord > aHyphenatedWord;
     398       15540 :             aHyphenatedWord = hOptions.rHyphenator->hyphenate(Text.copy(wBoundary.startPos,
     399             :                         wBoundary.endPos - wBoundary.startPos), rLocale,
     400       11655 :                     (sal_Int16) (hOptions.hyphenIndex - wBoundary.startPos - ((hOptions.hyphenIndex == wBoundary.endPos)? nStartPosWordEnd : 0)), hOptions.aHyphenationOptions);
     401        3885 :             if (aHyphenatedWord.is()) {
     402           0 :                 lbr.rHyphenatedWord = aHyphenatedWord;
     403           0 :                 if(wBoundary.startPos + aHyphenatedWord->getHyphenationPos() + 1 < nMinBreakPos )
     404           0 :                     lbr.breakIndex = -1;
     405             :                 else
     406           0 :                     lbr.breakIndex = wBoundary.startPos; //aHyphenatedWord->getHyphenationPos();
     407           0 :                 lbr.breakType = BreakType::HYPHENATION;
     408             : 
     409             :                 // check not optimal hyphenation of "word-word" (word with hyphens)
     410           0 :                 if (lbr.breakIndex > -1 && wBoundary.startPos + aHyphenatedWord->getHyphenationPos() < line.aBreakIterator->current()) {
     411           0 :                     lbr.breakIndex = line.aBreakIterator->current();
     412           0 :                     lbr.breakType = BreakType::WORDBOUNDARY;
     413             :                 }
     414             : 
     415             :             } else {
     416        3885 :                 lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
     417        3885 :                 lbr.breakType = BreakType::WORDBOUNDARY;;
     418        3885 :             }
     419             :         } else { //word boundary break
     420       34174 :             lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
     421       34174 :             lbr.breakType = BreakType::WORDBOUNDARY;
     422             :         }
     423             : 
     424             : #define WJ 0x2060   // Word Joiner
     425       42523 :         GlueSpace=false;
     426       42523 :         if (lbr.breakType == BreakType::WORDBOUNDARY) {
     427       42523 :             nStartPos = lbr.breakIndex;
     428       42523 :             if (Text[nStartPos--] == WJ)
     429           0 :                 GlueSpace=true;
     430      189093 :             while (nStartPos >= 0 &&
     431       92609 :                     (u_isWhitespace(Text.iterateCodePoints(&nStartPos, 0)) || Text[nStartPos] == WJ)) {
     432       23097 :                 if (Text[nStartPos--] == WJ)
     433           0 :                     GlueSpace=true;
     434             :             }
     435       42523 :             if (GlueSpace && nStartPos < 0)  {
     436           0 :                 lbr.breakIndex = 0;
     437           0 :                 break;
     438             :             }
     439             :         }
     440             :     }
     441             : 
     442       42523 :     return lbr;
     443             : }
     444             : 
     445             : OUString SAL_CALL
     446           0 : BreakIterator_Unicode::getImplementationName() throw( uno::RuntimeException, std::exception )
     447             : {
     448           0 :     return OUString::createFromAscii(cBreakIterator);
     449             : }
     450             : 
     451             : sal_Bool SAL_CALL
     452           0 : BreakIterator_Unicode::supportsService(const OUString& rServiceName) throw( uno::RuntimeException, std::exception )
     453             : {
     454           0 :     return cppu::supportsService(this, rServiceName);
     455             : }
     456             : 
     457             : uno::Sequence< OUString > SAL_CALL
     458           0 : BreakIterator_Unicode::getSupportedServiceNames() throw( uno::RuntimeException, std::exception )
     459             : {
     460           0 :     uno::Sequence< OUString > aRet(1);
     461           0 :     aRet[0] = OUString::createFromAscii(cBreakIterator);
     462           0 :     return aRet;
     463             : }
     464             : 
     465             : } } } }
     466             : 
     467             : extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * SAL_CALL
     468        4482 : com_sun_star_i18n_BreakIterator_Unicode_get_implementation(
     469             :     css::uno::XComponentContext *,
     470             :     css::uno::Sequence<css::uno::Any> const &)
     471             : {
     472        4482 :     return cppu::acquire(new css::i18n::BreakIterator_Unicode());
     473             : }
     474             : 
     475             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11