LCOV - code coverage report
Current view: top level - i18npool/source/breakiterator - breakiterator_unicode.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 209 254 82.3 %
Date: 2014-11-03 Functions: 17 20 85.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include <breakiterator_unicode.hxx>
      21             : #include <cppuhelper/supportsservice.hxx>
      22             : #include <localedata.hxx>
      23             : #include <i18nlangtag/languagetag.hxx>
      24             : #include <i18nlangtag/languagetagicu.hxx>
      25             : #include <unicode/uchar.h>
      26             : #include <unicode/locid.h>
      27             : #include <unicode/rbbi.h>
      28             : #include <unicode/udata.h>
      29             : #include <rtl/strbuf.hxx>
      30             : #include <rtl/ustring.hxx>
      31             : #include <string.h>
      32             : 
      33             : U_CDECL_BEGIN
      34             : extern const char OpenOffice_dat[];
      35             : U_CDECL_END
      36             : 
      37             : using namespace ::com::sun::star;
      38             : using namespace ::com::sun::star::lang;
      39             : 
      40             : namespace com { namespace sun { namespace star { namespace i18n {
      41             : 
      42             : 
      43       17589 : BreakIterator_Unicode::BreakIterator_Unicode()
      44             :     : cBreakIterator( "com.sun.star.i18n.BreakIterator_Unicode" )    // implementation name
      45             :     , lineRule( "line" )
      46             :     , icuBI( NULL )
      47       17589 :     , aBreakType(0)
      48             : {
      49       17589 : }
      50             : 
      51       52516 : BreakIterator_Unicode::~BreakIterator_Unicode()
      52             : {
      53       17540 :     delete character.aBreakIterator;
      54       17540 :     delete sentence.aBreakIterator;
      55       17540 :     delete line.aBreakIterator;
      56       87700 :     for (size_t i = 0; i < SAL_N_ELEMENTS(words); i++)
      57       70160 :         delete words[i].aBreakIterator;
      58       34976 : }
      59             : 
      60             : /*
      61             :     Wrapper class to provide public access to the RuleBasedBreakIterator's
      62             :     setbreakType method.
      63             : */
      64       36840 : class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator
      65             : {
      66             :     public:
      67       18516 :     inline void publicSetBreakType(int32_t type)
      68             :         {
      69       18516 :             setBreakType(type);
      70       18516 :         };
      71       18516 :     OOoRuleBasedBreakIterator(UDataMemory* image,
      72             :                               UErrorCode &status)
      73       18516 :         : RuleBasedBreakIterator(image, status)
      74       18516 :         { };
      75             : 
      76             : };
      77             : 
      78             : // loading ICU breakiterator on demand.
      79      641354 : void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale,
      80             :         sal_Int16 rBreakType, sal_Int16 nWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException)
      81             : {
      82      641354 :     bool newBreak = false;
      83      641354 :     UErrorCode status = U_ZERO_ERROR;
      84      641354 :     sal_Int16 breakType = 0;
      85      641354 :     switch (rBreakType) {
      86      330092 :         case LOAD_CHARACTER_BREAKITERATOR: icuBI=&character; breakType = 3; break;
      87             :         case LOAD_WORD_BREAKITERATOR:
      88             :             assert (nWordType >= 0 && nWordType<= WordType::WORD_COUNT);
      89      151826 :             icuBI=&words[nWordType];
      90      151826 :             switch (nWordType) {
      91        3784 :                 case WordType::ANY_WORD: break; // odd but previous behavior
      92             :                 case WordType::ANYWORD_IGNOREWHITESPACES:
      93        1860 :                     breakType = 0; rule = "edit_word"; break;
      94             :                 case WordType::DICTIONARY_WORD:
      95      133274 :                     breakType = 1; rule = "dict_word"; break;
      96             :                 default:
      97             :                 case WordType::WORD_COUNT:
      98       12908 :                     breakType = 2; rule = "count_word"; break;
      99             :             }
     100      151826 :             break;
     101        1120 :         case LOAD_SENTENCE_BREAKITERATOR: icuBI=&sentence; breakType = 5; break;
     102      158316 :         case LOAD_LINE_BREAKITERATOR: icuBI=&line; breakType = 4; break;
     103             :     }
     104     1906120 :     if (!icuBI->aBreakIterator ||
     105     1246390 :         rLocale.Language != icuBI->maLocale.Language ||
     106     1887040 :         rLocale.Country  != icuBI->maLocale.Country  ||
     107      622708 :         rLocale.Variant  != icuBI->maLocale.Variant) {
     108       18646 :         if (icuBI->aBreakIterator) {
     109         704 :             delete icuBI->aBreakIterator;
     110         704 :             icuBI->aBreakIterator=NULL;
     111             :         }
     112       18646 :         if (rule) {
     113       18524 :             uno::Sequence< OUString > breakRules = LocaleDataImpl().getBreakIteratorRules(rLocale);
     114             : 
     115       18524 :             status = U_ZERO_ERROR;
     116       18524 :             udata_setAppData("OpenOffice", OpenOffice_dat, &status);
     117       18524 :             if ( !U_SUCCESS(status) ) throw uno::RuntimeException();
     118             : 
     119       18524 :             OOoRuleBasedBreakIterator *rbi = NULL;
     120             : 
     121       18524 :             if (breakRules.getLength() > breakType && !breakRules[breakType].isEmpty())
     122             :             {
     123             :                 rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", "brk",
     124          10 :                     OUStringToOString(breakRules[breakType], RTL_TEXTENCODING_ASCII_US).getStr(), &status), status);
     125             :             }
     126             :             //use icu's breakiterator for Thai, Khmer, Tibetan and Dzongkha
     127       18514 :             else if (rLocale.Language != "th" && rLocale.Language != "lo" && rLocale.Language != "km" && rLocale.Language != "bo" && rLocale.Language != "dz")
     128             :             {
     129       18506 :                 status = U_ZERO_ERROR;
     130       18506 :                 OStringBuffer aUDName(64);
     131       18506 :                 aUDName.append(rule);
     132       18506 :                 aUDName.append('_');
     133       18506 :                 aUDName.append( OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US));
     134       18506 :                 UDataMemory* pUData = udata_open("OpenOffice", "brk", aUDName.getStr(), &status);
     135       18506 :                 if( U_SUCCESS(status) )
     136          10 :                     rbi = new OOoRuleBasedBreakIterator( pUData, status);
     137       18506 :                 if (!U_SUCCESS(status) ) {
     138       18496 :                     status = U_ZERO_ERROR;
     139       18496 :                     pUData = udata_open("OpenOffice", "brk", rule, &status);
     140       18496 :                     if( U_SUCCESS(status) )
     141       18496 :                         rbi = new OOoRuleBasedBreakIterator( pUData, status);
     142       18496 :                     if (!U_SUCCESS(status) ) icuBI->aBreakIterator=NULL;
     143       18506 :                 }
     144             :             }
     145       18524 :             if (rbi) {
     146       18516 :                 switch (rBreakType) {
     147        2170 :                     case LOAD_CHARACTER_BREAKITERATOR: rbi->publicSetBreakType(UBRK_CHARACTER); break;
     148        1510 :                     case LOAD_WORD_BREAKITERATOR: rbi->publicSetBreakType(UBRK_WORD); break;
     149         112 :                     case LOAD_SENTENCE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_SENTENCE); break;
     150       14724 :                     case LOAD_LINE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_LINE); break;
     151             :                 }
     152       18516 :                 icuBI->aBreakIterator = rbi;
     153       18524 :             }
     154             :         }
     155             : 
     156       18646 :         if (!icuBI->aBreakIterator) {
     157         130 :             icu::Locale icuLocale( LanguageTagIcu::getIcuLocale( LanguageTag( rLocale)));
     158             : 
     159         130 :             status = U_ZERO_ERROR;
     160         130 :             switch (rBreakType) {
     161             :                 case LOAD_CHARACTER_BREAKITERATOR:
     162           0 :                     icuBI->aBreakIterator =  icu::BreakIterator::createCharacterInstance(icuLocale, status);
     163           0 :                     break;
     164             :                 case LOAD_WORD_BREAKITERATOR:
     165         130 :                     icuBI->aBreakIterator =  icu::BreakIterator::createWordInstance(icuLocale, status);
     166         130 :                     break;
     167             :                 case LOAD_SENTENCE_BREAKITERATOR:
     168           0 :                     icuBI->aBreakIterator = icu::BreakIterator::createSentenceInstance(icuLocale, status);
     169           0 :                     break;
     170             :                 case LOAD_LINE_BREAKITERATOR:
     171           0 :                     icuBI->aBreakIterator = icu::BreakIterator::createLineInstance(icuLocale, status);
     172           0 :                     break;
     173             :             }
     174         130 :             if ( !U_SUCCESS(status) ) {
     175           0 :                 icuBI->aBreakIterator=NULL;
     176           0 :                 throw uno::RuntimeException();
     177         130 :             }
     178             :         }
     179       18646 :         if (icuBI->aBreakIterator) {
     180       18646 :             icuBI->maLocale=rLocale;
     181       18646 :             newBreak=true;
     182             :         } else {
     183           0 :             throw uno::RuntimeException();
     184             :         }
     185             :     }
     186             : 
     187      641354 :     if (newBreak || icuBI->aICUText.pData != rText.pData)
     188             :     {
     189             :         // UChar != sal_Unicode in MinGW
     190       82488 :         const UChar *pText = reinterpret_cast<const UChar *>(rText.getStr());
     191             : 
     192       82488 :         icuBI->ut = utext_openUChars(icuBI->ut, pText, rText.getLength(), &status);
     193             : 
     194       82488 :         if (!U_SUCCESS(status))
     195           0 :             throw uno::RuntimeException();
     196             : 
     197       82488 :         icuBI->aBreakIterator->setText(icuBI->ut, status);
     198             : 
     199       82488 :         if (!U_SUCCESS(status))
     200           0 :             throw uno::RuntimeException();
     201             : 
     202       82488 :         icuBI->aICUText = rText;
     203             :     }
     204      641354 : }
     205             : 
     206      424282 : sal_Int32 SAL_CALL BreakIterator_Unicode::nextCharacters( const OUString& Text,
     207             :         sal_Int32 nStartPos, const lang::Locale &rLocale,
     208             :         sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
     209             :         throw(uno::RuntimeException, std::exception)
     210             : {
     211      424282 :     if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
     212      327728 :         loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
     213      654396 :         for (nDone = 0; nDone < nCount; nDone++) {
     214      326720 :             nStartPos = character.aBreakIterator->following(nStartPos);
     215      326720 :             if (nStartPos == BreakIterator::DONE)
     216          52 :                 return Text.getLength();
     217             :         }
     218             :     } else { // for CHARACTER mode
     219      193108 :         for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++)
     220       96554 :             Text.iterateCodePoints(&nStartPos, 1);
     221             :     }
     222      424230 :     return nStartPos;
     223             : }
     224             : 
     225       72358 : sal_Int32 SAL_CALL BreakIterator_Unicode::previousCharacters( const OUString& Text,
     226             :         sal_Int32 nStartPos, const lang::Locale& rLocale,
     227             :         sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
     228             :         throw(uno::RuntimeException, std::exception)
     229             : {
     230       72358 :     if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
     231        2364 :         loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
     232        4728 :         for (nDone = 0; nDone < nCount; nDone++) {
     233        2364 :             nStartPos = character.aBreakIterator->preceding(nStartPos);
     234        2364 :             if (nStartPos == BreakIterator::DONE)
     235           0 :                 return 0;
     236             :         }
     237             :     } else { // for BS to delete one char and CHARACTER mode.
     238      139988 :         for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++)
     239       69994 :             Text.iterateCodePoints(&nStartPos, -1);
     240             :     }
     241       72358 :     return nStartPos;
     242             : }
     243             : 
     244             : 
     245        4687 : Boundary SAL_CALL BreakIterator_Unicode::nextWord( const OUString& Text, sal_Int32 nStartPos,
     246             :     const lang::Locale& rLocale, sal_Int16 rWordType ) throw(uno::RuntimeException, std::exception)
     247             : {
     248        4687 :     loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
     249             : 
     250        4687 :     result.startPos = icuBI->aBreakIterator->following(nStartPos);
     251        4687 :     if( result.startPos >= Text.getLength() || result.startPos == BreakIterator::DONE )
     252         171 :         result.endPos = result.startPos;
     253             :     else {
     254        8296 :         if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
     255        9032 :                     rWordType == WordType::DICTIONARY_WORD ) &&
     256        4516 :                 u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
     257          95 :             result.startPos = icuBI->aBreakIterator->following(result.startPos);
     258             : 
     259        4516 :         result.endPos = icuBI->aBreakIterator->following(result.startPos);
     260        4516 :         if(result.endPos == BreakIterator::DONE)
     261           0 :             result.endPos = result.startPos;
     262             :     }
     263        4687 :     return result;
     264             : }
     265             : 
     266             : 
     267         862 : Boundary SAL_CALL BreakIterator_Unicode::previousWord(const OUString& Text, sal_Int32 nStartPos,
     268             :         const lang::Locale& rLocale, sal_Int16 rWordType) throw(uno::RuntimeException, std::exception)
     269             : {
     270         862 :     loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
     271             : 
     272         862 :     result.startPos = icuBI->aBreakIterator->preceding(nStartPos);
     273         862 :     if( result.startPos < 0 || result.startPos == BreakIterator::DONE)
     274           0 :         result.endPos = result.startPos;
     275             :     else {
     276         872 :         if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
     277        1724 :                     rWordType == WordType::DICTIONARY_WORD) &&
     278         862 :                 u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
     279           0 :             result.startPos = icuBI->aBreakIterator->preceding(result.startPos);
     280             : 
     281         862 :         result.endPos = icuBI->aBreakIterator->following(result.startPos);
     282         862 :         if(result.endPos == BreakIterator::DONE)
     283           0 :             result.endPos = result.startPos;
     284             :     }
     285         862 :     return result;
     286             : }
     287             : 
     288             : 
     289      146277 : Boundary SAL_CALL BreakIterator_Unicode::getWordBoundary( const OUString& Text, sal_Int32 nPos, const lang::Locale& rLocale,
     290             :         sal_Int16 rWordType, sal_Bool bDirection ) throw(uno::RuntimeException, std::exception)
     291             : {
     292      146277 :     loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
     293      146277 :     sal_Int32 len = Text.getLength();
     294             : 
     295      146277 :     if(icuBI->aBreakIterator->isBoundary(nPos)) {
     296       58271 :         result.startPos = result.endPos = nPos;
     297       58271 :         if((bDirection || nPos == 0) && nPos < len) //forward
     298       56349 :             result.endPos = icuBI->aBreakIterator->following(nPos);
     299             :         else
     300        1922 :             result.startPos = icuBI->aBreakIterator->preceding(nPos);
     301             :     } else {
     302       88006 :         if(nPos <= 0) {
     303           0 :             result.startPos = 0;
     304           0 :             result.endPos = len ? icuBI->aBreakIterator->following((sal_Int32)0) : 0;
     305       88006 :         } else if(nPos >= len) {
     306           0 :             result.startPos = icuBI->aBreakIterator->preceding(len);
     307           0 :             result.endPos = len;
     308             :         } else {
     309       88006 :             result.startPos = icuBI->aBreakIterator->preceding(nPos);
     310       88006 :             result.endPos = icuBI->aBreakIterator->following(nPos);
     311             :         }
     312             :     }
     313      146277 :     if (result.startPos == BreakIterator::DONE)
     314           0 :         result.startPos = result.endPos;
     315      146277 :     else if (result.endPos == BreakIterator::DONE)
     316           0 :         result.endPos = result.startPos;
     317             : 
     318      146277 :     return result;
     319             : }
     320             : 
     321             : 
     322          34 : sal_Int32 SAL_CALL BreakIterator_Unicode::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
     323             :         const lang::Locale &rLocale ) throw(uno::RuntimeException, std::exception)
     324             : {
     325          34 :     loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
     326             : 
     327          34 :     sal_Int32 len = Text.getLength();
     328          34 :     if (len > 0 && nStartPos == len)
     329           8 :         Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
     330          34 :     if (!sentence.aBreakIterator->isBoundary(nStartPos))
     331          20 :         nStartPos = sentence.aBreakIterator->preceding(nStartPos);
     332             : 
     333             :     // skip preceding space.
     334          34 :     sal_uInt32 ch = Text.iterateCodePoints(&nStartPos, 1);
     335          34 :     while (nStartPos < len && u_isWhitespace(ch)) ch = Text.iterateCodePoints(&nStartPos, 1);
     336          34 :     Text.iterateCodePoints(&nStartPos, -1);
     337             : 
     338          34 :     return nStartPos;
     339             : }
     340             : 
     341        1086 : sal_Int32 SAL_CALL BreakIterator_Unicode::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
     342             :         const lang::Locale &rLocale ) throw(uno::RuntimeException, std::exception)
     343             : {
     344        1086 :     loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
     345             : 
     346        1086 :     sal_Int32 len = Text.getLength();
     347        1086 :     if (len > 0 && nStartPos == len)
     348          54 :         Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
     349        1086 :     nStartPos = sentence.aBreakIterator->following(nStartPos);
     350             : 
     351        1086 :     sal_Int32 nPos=nStartPos;
     352        1086 :     while (nPos > 0 && u_isWhitespace(Text.iterateCodePoints(&nPos, -1))) nStartPos=nPos;
     353             : 
     354        1086 :     return nStartPos;
     355             : }
     356             : 
     357      158316 : LineBreakResults SAL_CALL BreakIterator_Unicode::getLineBreak(
     358             :         const OUString& Text, sal_Int32 nStartPos,
     359             :         const lang::Locale& rLocale, sal_Int32 nMinBreakPos,
     360             :         const LineBreakHyphenationOptions& hOptions,
     361             :         const LineBreakUserOptions& /*rOptions*/ ) throw(uno::RuntimeException, std::exception)
     362             : {
     363      158316 :     LineBreakResults lbr;
     364             : 
     365      158316 :     if (nStartPos >= Text.getLength()) {
     366           0 :         lbr.breakIndex = Text.getLength();
     367           0 :         lbr.breakType = BreakType::WORDBOUNDARY;
     368           0 :         return lbr;
     369             :     }
     370             : 
     371      158316 :     loadICUBreakIterator(rLocale, LOAD_LINE_BREAKITERATOR, 0, lineRule, Text);
     372             : 
     373      158316 :     bool GlueSpace=true;
     374      474948 :     while (GlueSpace) {
     375      158316 :         if (line.aBreakIterator->preceding(nStartPos + 1) == nStartPos) { //Line boundary break
     376       42149 :             lbr.breakIndex = nStartPos;
     377       42149 :             lbr.breakType = BreakType::WORDBOUNDARY;
     378      116167 :         } else if (hOptions.rHyphenator.is()) { //Hyphenation break
     379       45487 :             sal_Int32 boundary_with_punctuation = (line.aBreakIterator->next() != BreakIterator::DONE) ? line.aBreakIterator->current() : 0;
     380       45487 :             line.aBreakIterator->preceding(nStartPos + 1); // reset to check correct hyphenation of "word-word"
     381             : 
     382       45487 :             sal_Int32 nStartPosWordEnd = nStartPos;
     383       94168 :             while (line.aBreakIterator->current() < nStartPosWordEnd && u_ispunct((sal_uInt32)Text[nStartPosWordEnd])) // starting punctuation
     384        3194 :                 nStartPosWordEnd --;
     385             : 
     386             :             Boundary wBoundary = getWordBoundary( Text, nStartPosWordEnd, rLocale,
     387       45487 :                 WordType::DICTIONARY_WORD, false);
     388             : 
     389       45487 :             nStartPosWordEnd = wBoundary.endPos;
     390       92342 :             while (nStartPosWordEnd < Text.getLength() && (u_ispunct((sal_uInt32)Text[nStartPosWordEnd]))) // ending punctuation
     391        1368 :                 nStartPosWordEnd ++;
     392       45487 :             nStartPosWordEnd = nStartPosWordEnd - wBoundary.endPos;
     393       45487 :             if (hOptions.hyphenIndex - wBoundary.startPos < nStartPosWordEnd) nStartPosWordEnd = hOptions.hyphenIndex - wBoundary.startPos;
     394             : #define SPACE 0x0020
     395       45487 :             while (boundary_with_punctuation > wBoundary.endPos && Text[--boundary_with_punctuation] == SPACE);
     396       45487 :             if (boundary_with_punctuation != 0) boundary_with_punctuation += 1 - wBoundary.endPos;
     397       45487 :             uno::Reference< linguistic2::XHyphenatedWord > aHyphenatedWord;
     398      181948 :             aHyphenatedWord = hOptions.rHyphenator->hyphenate(Text.copy(wBoundary.startPos,
     399             :                         wBoundary.endPos - wBoundary.startPos), rLocale,
     400      136461 :                     (sal_Int16) (hOptions.hyphenIndex - wBoundary.startPos - ((hOptions.hyphenIndex == wBoundary.endPos)? nStartPosWordEnd : 0)), hOptions.aHyphenationOptions);
     401       45487 :             if (aHyphenatedWord.is()) {
     402           0 :                 lbr.rHyphenatedWord = aHyphenatedWord;
     403           0 :                 if(wBoundary.startPos + aHyphenatedWord->getHyphenationPos() + 1 < nMinBreakPos )
     404           0 :                     lbr.breakIndex = -1;
     405             :                 else
     406           0 :                     lbr.breakIndex = wBoundary.startPos; //aHyphenatedWord->getHyphenationPos();
     407           0 :                 lbr.breakType = BreakType::HYPHENATION;
     408             : 
     409             :                 // check not optimal hyphenation of "word-word" (word with hyphens)
     410           0 :                 if (lbr.breakIndex > -1 && wBoundary.startPos + aHyphenatedWord->getHyphenationPos() < line.aBreakIterator->current()) {
     411           0 :                     lbr.breakIndex = line.aBreakIterator->current();
     412           0 :                     lbr.breakType = BreakType::WORDBOUNDARY;
     413             :                 }
     414             : 
     415             :             } else {
     416       45487 :                 lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
     417       45487 :                 lbr.breakType = BreakType::WORDBOUNDARY;;
     418       45487 :             }
     419             :         } else { //word boundary break
     420       70680 :             lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
     421       70680 :             lbr.breakType = BreakType::WORDBOUNDARY;
     422             :         }
     423             : 
     424             : #define WJ 0x2060   // Word Joiner
     425      158316 :         GlueSpace=false;
     426      158316 :         if (lbr.breakType == BreakType::WORDBOUNDARY) {
     427      158316 :             nStartPos = lbr.breakIndex;
     428      158316 :             if (Text[nStartPos--] == WJ)
     429           0 :                 GlueSpace=true;
     430      695589 :             while (nStartPos >= 0 &&
     431      305469 :                     (u_isWhitespace(Text.iterateCodePoints(&nStartPos, 0)) || Text[nStartPos] == WJ)) {
     432       90489 :                 if (Text[nStartPos--] == WJ)
     433           0 :                     GlueSpace=true;
     434             :             }
     435      158316 :             if (GlueSpace && nStartPos < 0)  {
     436           0 :                 lbr.breakIndex = 0;
     437           0 :                 break;
     438             :             }
     439             :         }
     440             :     }
     441             : 
     442      158316 :     return lbr;
     443             : }
     444             : 
     445             : OUString SAL_CALL
     446           0 : BreakIterator_Unicode::getImplementationName(void) throw( uno::RuntimeException, std::exception )
     447             : {
     448           0 :     return OUString::createFromAscii(cBreakIterator);
     449             : }
     450             : 
     451             : sal_Bool SAL_CALL
     452           0 : BreakIterator_Unicode::supportsService(const OUString& rServiceName) throw( uno::RuntimeException, std::exception )
     453             : {
     454           0 :     return cppu::supportsService(this, rServiceName);
     455             : }
     456             : 
     457             : uno::Sequence< OUString > SAL_CALL
     458           0 : BreakIterator_Unicode::getSupportedServiceNames(void) throw( uno::RuntimeException, std::exception )
     459             : {
     460           0 :     uno::Sequence< OUString > aRet(1);
     461           0 :     aRet[0] = OUString::createFromAscii(cBreakIterator);
     462           0 :     return aRet;
     463             : }
     464             : 
     465             : } } } }
     466             : 
     467             : extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * SAL_CALL
     468       17485 : com_sun_star_i18n_BreakIterator_Unicode_get_implementation(
     469             :     css::uno::XComponentContext *,
     470             :     css::uno::Sequence<css::uno::Any> const &)
     471             : {
     472       17485 :     return cppu::acquire(new css::i18n::BreakIterator_Unicode());
     473             : }
     474             : 
     475             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10