LCOV - code coverage report
Current view: top level - i18npool/source/breakiterator - breakiterator_unicode.cxx (source / functions) Hit Total Coverage
Test: commit 0e63ca4fde4e446f346e35849c756a30ca294aab Lines: 209 254 82.3 %
Date: 2014-04-11 Functions: 17 20 85.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include <breakiterator_unicode.hxx>
      21             : #include <cppuhelper/supportsservice.hxx>
      22             : #include <localedata.hxx>
      23             : #include <i18nlangtag/languagetag.hxx>
      24             : #include <i18nlangtag/languagetagicu.hxx>
      25             : #include <unicode/uchar.h>
      26             : #include <unicode/locid.h>
      27             : #include <unicode/rbbi.h>
      28             : #include <unicode/udata.h>
      29             : #include <rtl/strbuf.hxx>
      30             : #include <rtl/ustring.hxx>
      31             : #include <string.h>
      32             : 
      33             : U_CDECL_BEGIN
      34             : extern const char OpenOffice_dat[];
      35             : U_CDECL_END
      36             : 
      37             : using namespace ::com::sun::star;
      38             : using namespace ::com::sun::star::lang;
      39             : 
      40             : namespace com { namespace sun { namespace star { namespace i18n {
      41             : 
      42             : 
      43        2591 : BreakIterator_Unicode::BreakIterator_Unicode()
      44             :     : cBreakIterator( "com.sun.star.i18n.BreakIterator_Unicode" )    // implementation name
      45             :     , wordRule( "word" )
      46             :     , lineRule( "line" )
      47             :     , icuBI( NULL )
      48        2591 :     , aBreakType(0)
      49             : {
      50        2591 : }
      51             : 
      52        7334 : BreakIterator_Unicode::~BreakIterator_Unicode()
      53             : {
      54        2446 :     delete character.aBreakIterator;
      55        2446 :     delete sentence.aBreakIterator;
      56        2446 :     delete line.aBreakIterator;
      57       12230 :     for (size_t i = 0; i < SAL_N_ELEMENTS(words); i++)
      58        9784 :         delete words[i].aBreakIterator;
      59        4888 : }
      60             : 
      61             : /*
      62             :     Wrapper class to provide public access to the RuleBasedBreakIterator's
      63             :     setbreakType method.
      64             : */
      65        7832 : class OOoRuleBasedBreakIterator : public RuleBasedBreakIterator
      66             : {
      67             :     public:
      68        4174 :     inline void publicSetBreakType(int32_t type)
      69             :         {
      70        4174 :             setBreakType(type);
      71        4174 :         };
      72        4174 :     OOoRuleBasedBreakIterator(UDataMemory* image,
      73             :                               UErrorCode &status)
      74        4174 :         : RuleBasedBreakIterator(image, status)
      75        4174 :         { };
      76             : 
      77             : };
      78             : 
      79             : // loading ICU breakiterator on demand.
      80      401405 : void SAL_CALL BreakIterator_Unicode::loadICUBreakIterator(const com::sun::star::lang::Locale& rLocale,
      81             :         sal_Int16 rBreakType, sal_Int16 nWordType, const sal_Char *rule, const OUString& rText) throw(uno::RuntimeException)
      82             : {
      83      401405 :     sal_Bool newBreak = sal_False;
      84      401405 :     UErrorCode status = U_ZERO_ERROR;
      85      401405 :     sal_Int16 breakType = 0;
      86      401405 :     switch (rBreakType) {
      87      198629 :         case LOAD_CHARACTER_BREAKITERATOR: icuBI=&character; breakType = 3; break;
      88             :         case LOAD_WORD_BREAKITERATOR:
      89             :             assert (nWordType >= 0 && nWordType<= WordType::WORD_COUNT);
      90      118275 :             icuBI=&words[nWordType];
      91      118275 :             switch (nWordType) {
      92        1953 :                 case WordType::ANY_WORD: break; // odd but previous behavior
      93             :                 case WordType::ANYWORD_IGNOREWHITESPACES:
      94         929 :                     breakType = 0; rule = wordRule = "edit_word"; break;
      95             :                 case WordType::DICTIONARY_WORD:
      96      107004 :                     breakType = 1; rule = wordRule = "dict_word"; break;
      97             :                 default:
      98             :                 case WordType::WORD_COUNT:
      99        8389 :                     breakType = 2; rule = wordRule = "count_word"; break;
     100             :             }
     101      118275 :             break;
     102         467 :         case LOAD_SENTENCE_BREAKITERATOR: icuBI=&sentence; breakType = 5; break;
     103       84034 :         case LOAD_LINE_BREAKITERATOR: icuBI=&line; breakType = 4; break;
     104             :     }
     105     1201429 :     if (!icuBI->aBreakIterator ||
     106      795850 :         rLocale.Language != icuBI->maLocale.Language ||
     107     1195807 :         rLocale.Country  != icuBI->maLocale.Country  ||
     108      397171 :         rLocale.Variant  != icuBI->maLocale.Variant) {
     109        4234 :         if (icuBI->aBreakIterator) {
     110        1448 :             delete icuBI->aBreakIterator;
     111        1448 :             icuBI->aBreakIterator=NULL;
     112             :         }
     113        4234 :         if (rule) {
     114        4177 :             uno::Sequence< OUString > breakRules = LocaleDataImpl().getBreakIteratorRules(rLocale);
     115             : 
     116        4177 :             status = U_ZERO_ERROR;
     117        4177 :             udata_setAppData("OpenOffice", OpenOffice_dat, &status);
     118        4177 :             if ( !U_SUCCESS(status) ) throw uno::RuntimeException();
     119             : 
     120        4177 :             OOoRuleBasedBreakIterator *rbi = NULL;
     121             : 
     122        4177 :             if (breakRules.getLength() > breakType && !breakRules[breakType].isEmpty())
     123             :             {
     124             :                 rbi = new OOoRuleBasedBreakIterator(udata_open("OpenOffice", "brk",
     125           5 :                     OUStringToOString(breakRules[breakType], RTL_TEXTENCODING_ASCII_US).getStr(), &status), status);
     126             :             }
     127             :             //use icu's breakiterator for Thai, Khmer, Tibetan and Dzongkha
     128        4172 :             else if (rLocale.Language != "th" && rLocale.Language != "lo" && rLocale.Language != "km" && rLocale.Language != "bo" && rLocale.Language != "dz")
     129             :             {
     130        4169 :                 status = U_ZERO_ERROR;
     131        4169 :                 OStringBuffer aUDName(64);
     132        4169 :                 aUDName.append(rule);
     133        4169 :                 aUDName.append('_');
     134        4169 :                 aUDName.append( OUStringToOString(rLocale.Language, RTL_TEXTENCODING_ASCII_US));
     135        4169 :                 UDataMemory* pUData = udata_open("OpenOffice", "brk", aUDName.getStr(), &status);
     136        4169 :                 if( U_SUCCESS(status) )
     137           3 :                     rbi = new OOoRuleBasedBreakIterator( pUData, status);
     138        4169 :                 if (!U_SUCCESS(status) ) {
     139        4166 :                     status = U_ZERO_ERROR;
     140        4166 :                     pUData = udata_open("OpenOffice", "brk", rule, &status);
     141        4166 :                     if( U_SUCCESS(status) )
     142        4166 :                         rbi = new OOoRuleBasedBreakIterator( pUData, status);
     143        4166 :                     if (!U_SUCCESS(status) ) icuBI->aBreakIterator=NULL;
     144        4169 :                 }
     145             :             }
     146        4177 :             if (rbi) {
     147        4174 :                 switch (rBreakType) {
     148        1789 :                     case LOAD_CHARACTER_BREAKITERATOR: rbi->publicSetBreakType(UBRK_CHARACTER); break;
     149         699 :                     case LOAD_WORD_BREAKITERATOR: rbi->publicSetBreakType(UBRK_WORD); break;
     150          51 :                     case LOAD_SENTENCE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_SENTENCE); break;
     151        1635 :                     case LOAD_LINE_BREAKITERATOR: rbi->publicSetBreakType(UBRK_LINE); break;
     152             :                 }
     153        4174 :                 icuBI->aBreakIterator = rbi;
     154        4177 :             }
     155             :         }
     156             : 
     157        4234 :         if (!icuBI->aBreakIterator) {
     158          60 :             icu::Locale icuLocale( LanguageTagIcu::getIcuLocale( LanguageTag( rLocale)));
     159             : 
     160          60 :             status = U_ZERO_ERROR;
     161          60 :             switch (rBreakType) {
     162             :                 case LOAD_CHARACTER_BREAKITERATOR:
     163           0 :                     icuBI->aBreakIterator =  icu::BreakIterator::createCharacterInstance(icuLocale, status);
     164           0 :                     break;
     165             :                 case LOAD_WORD_BREAKITERATOR:
     166          60 :                     icuBI->aBreakIterator =  icu::BreakIterator::createWordInstance(icuLocale, status);
     167          60 :                     break;
     168             :                 case LOAD_SENTENCE_BREAKITERATOR:
     169           0 :                     icuBI->aBreakIterator = icu::BreakIterator::createSentenceInstance(icuLocale, status);
     170           0 :                     break;
     171             :                 case LOAD_LINE_BREAKITERATOR:
     172           0 :                     icuBI->aBreakIterator = icu::BreakIterator::createLineInstance(icuLocale, status);
     173           0 :                     break;
     174             :             }
     175          60 :             if ( !U_SUCCESS(status) ) {
     176           0 :                 icuBI->aBreakIterator=NULL;
     177           0 :                 throw uno::RuntimeException();
     178          60 :             }
     179             :         }
     180        4234 :         if (icuBI->aBreakIterator) {
     181        4234 :             icuBI->maLocale=rLocale;
     182        4234 :             newBreak=sal_True;
     183             :         } else {
     184           0 :             throw uno::RuntimeException();
     185             :         }
     186             :     }
     187             : 
     188      401405 :     if (newBreak || !icuBI->aICUText.equals(rText))
     189             :     {
     190             :         // UChar != sal_Unicode in MinGW
     191       48015 :         const UChar *pText = reinterpret_cast<const UChar *>(rText.getStr());
     192             : 
     193       48015 :         icuBI->ut = utext_openUChars(icuBI->ut, pText, rText.getLength(), &status);
     194             : 
     195       48015 :         if (!U_SUCCESS(status))
     196           0 :             throw uno::RuntimeException();
     197             : 
     198       48015 :         icuBI->aBreakIterator->setText(icuBI->ut, status);
     199             : 
     200       48015 :         if (!U_SUCCESS(status))
     201           0 :             throw uno::RuntimeException();
     202             : 
     203       48015 :         icuBI->aICUText = rText;
     204             :     }
     205      401405 : }
     206             : 
     207      244761 : sal_Int32 SAL_CALL BreakIterator_Unicode::nextCharacters( const OUString& Text,
     208             :         sal_Int32 nStartPos, const lang::Locale &rLocale,
     209             :         sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
     210             :         throw(uno::RuntimeException, std::exception)
     211             : {
     212      244761 :     if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
     213      196780 :         loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
     214      393114 :         for (nDone = 0; nDone < nCount; nDone++) {
     215      196358 :             nStartPos = character.aBreakIterator->following(nStartPos);
     216      196358 :             if (nStartPos == BreakIterator::DONE)
     217          24 :                 return Text.getLength();
     218             :         }
     219             :     } else { // for CHARACTER mode
     220       95962 :         for (nDone = 0; nDone < nCount && nStartPos < Text.getLength(); nDone++)
     221       47981 :             Text.iterateCodePoints(&nStartPos, 1);
     222             :     }
     223      244737 :     return nStartPos;
     224             : }
     225             : 
     226       36430 : sal_Int32 SAL_CALL BreakIterator_Unicode::previousCharacters( const OUString& Text,
     227             :         sal_Int32 nStartPos, const lang::Locale& rLocale,
     228             :         sal_Int16 nCharacterIteratorMode, sal_Int32 nCount, sal_Int32& nDone )
     229             :         throw(uno::RuntimeException, std::exception)
     230             : {
     231       36430 :     if (nCharacterIteratorMode == CharacterIteratorMode::SKIPCELL ) { // for CELL mode
     232        1849 :         loadICUBreakIterator(rLocale, LOAD_CHARACTER_BREAKITERATOR, 0, "char", Text);
     233        3698 :         for (nDone = 0; nDone < nCount; nDone++) {
     234        1849 :             nStartPos = character.aBreakIterator->preceding(nStartPos);
     235        1849 :             if (nStartPos == BreakIterator::DONE)
     236           0 :                 return 0;
     237             :         }
     238             :     } else { // for BS to delete one char and CHARACTER mode.
     239       69162 :         for (nDone = 0; nDone < nCount && nStartPos > 0; nDone++)
     240       34581 :             Text.iterateCodePoints(&nStartPos, -1);
     241             :     }
     242       36430 :     return nStartPos;
     243             : }
     244             : 
     245             : 
     246        3201 : Boundary SAL_CALL BreakIterator_Unicode::nextWord( const OUString& Text, sal_Int32 nStartPos,
     247             :     const lang::Locale& rLocale, sal_Int16 rWordType ) throw(uno::RuntimeException, std::exception)
     248             : {
     249        3201 :     loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
     250             : 
     251        3201 :     result.startPos = icuBI->aBreakIterator->following(nStartPos);
     252        3201 :     if( result.startPos >= Text.getLength() || result.startPos == BreakIterator::DONE )
     253         174 :         result.endPos = result.startPos;
     254             :     else {
     255        5686 :         if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
     256        6054 :                     rWordType == WordType::DICTIONARY_WORD ) &&
     257        3027 :                 u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
     258          24 :             result.startPos = icuBI->aBreakIterator->following(result.startPos);
     259             : 
     260        3027 :         result.endPos = icuBI->aBreakIterator->following(result.startPos);
     261        3027 :         if(result.endPos == BreakIterator::DONE)
     262           0 :             result.endPos = result.startPos;
     263             :     }
     264        3201 :     return result;
     265             : }
     266             : 
     267             : 
     268         431 : Boundary SAL_CALL BreakIterator_Unicode::previousWord(const OUString& Text, sal_Int32 nStartPos,
     269             :         const lang::Locale& rLocale, sal_Int16 rWordType) throw(uno::RuntimeException, std::exception)
     270             : {
     271         431 :     loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
     272             : 
     273         431 :     result.startPos = icuBI->aBreakIterator->preceding(nStartPos);
     274         431 :     if( result.startPos < 0 || result.startPos == BreakIterator::DONE)
     275           0 :         result.endPos = result.startPos;
     276             :     else {
     277         436 :         if ( (rWordType == WordType::ANYWORD_IGNOREWHITESPACES ||
     278         862 :                     rWordType == WordType::DICTIONARY_WORD) &&
     279         431 :                 u_isWhitespace(Text.iterateCodePoints(&result.startPos, 0)) )
     280           0 :             result.startPos = icuBI->aBreakIterator->preceding(result.startPos);
     281             : 
     282         431 :         result.endPos = icuBI->aBreakIterator->following(result.startPos);
     283         431 :         if(result.endPos == BreakIterator::DONE)
     284           0 :             result.endPos = result.startPos;
     285             :     }
     286         431 :     return result;
     287             : }
     288             : 
     289             : 
     290      114643 : Boundary SAL_CALL BreakIterator_Unicode::getWordBoundary( const OUString& Text, sal_Int32 nPos, const lang::Locale& rLocale,
     291             :         sal_Int16 rWordType, sal_Bool bDirection ) throw(uno::RuntimeException, std::exception)
     292             : {
     293      114643 :     loadICUBreakIterator(rLocale, LOAD_WORD_BREAKITERATOR, rWordType, NULL, Text);
     294      114643 :     sal_Int32 len = Text.getLength();
     295             : 
     296      114643 :     if(icuBI->aBreakIterator->isBoundary(nPos)) {
     297       47028 :         result.startPos = result.endPos = nPos;
     298       47028 :         if((bDirection || nPos == 0) && nPos < len) //forward
     299       46021 :             result.endPos = icuBI->aBreakIterator->following(nPos);
     300             :         else
     301        1007 :             result.startPos = icuBI->aBreakIterator->preceding(nPos);
     302             :     } else {
     303       67615 :         if(nPos <= 0) {
     304           0 :             result.startPos = 0;
     305           0 :             result.endPos = len ? icuBI->aBreakIterator->following((sal_Int32)0) : 0;
     306       67615 :         } else if(nPos >= len) {
     307           0 :             result.startPos = icuBI->aBreakIterator->preceding(len);
     308           0 :             result.endPos = len;
     309             :         } else {
     310       67615 :             result.startPos = icuBI->aBreakIterator->preceding(nPos);
     311       67615 :             result.endPos = icuBI->aBreakIterator->following(nPos);
     312             :         }
     313             :     }
     314      114643 :     if (result.startPos == BreakIterator::DONE)
     315           0 :         result.startPos = result.endPos;
     316      114643 :     else if (result.endPos == BreakIterator::DONE)
     317           0 :         result.endPos = result.startPos;
     318             : 
     319      114643 :     return result;
     320             : }
     321             : 
     322             : 
     323          17 : sal_Int32 SAL_CALL BreakIterator_Unicode::beginOfSentence( const OUString& Text, sal_Int32 nStartPos,
     324             :         const lang::Locale &rLocale ) throw(uno::RuntimeException, std::exception)
     325             : {
     326          17 :     loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
     327             : 
     328          17 :     sal_Int32 len = Text.getLength();
     329          17 :     if (len > 0 && nStartPos == len)
     330           4 :         Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
     331          17 :     if (!sentence.aBreakIterator->isBoundary(nStartPos))
     332          10 :         nStartPos = sentence.aBreakIterator->preceding(nStartPos);
     333             : 
     334             :     // skip preceding space.
     335          17 :     sal_uInt32 ch = Text.iterateCodePoints(&nStartPos, 1);
     336          17 :     while (nStartPos < len && u_isWhitespace(ch)) ch = Text.iterateCodePoints(&nStartPos, 1);
     337          17 :     Text.iterateCodePoints(&nStartPos, -1);
     338             : 
     339          17 :     return nStartPos;
     340             : }
     341             : 
     342         450 : sal_Int32 SAL_CALL BreakIterator_Unicode::endOfSentence( const OUString& Text, sal_Int32 nStartPos,
     343             :         const lang::Locale &rLocale ) throw(uno::RuntimeException, std::exception)
     344             : {
     345         450 :     loadICUBreakIterator(rLocale, LOAD_SENTENCE_BREAKITERATOR, 0, "sent", Text);
     346             : 
     347         450 :     sal_Int32 len = Text.getLength();
     348         450 :     if (len > 0 && nStartPos == len)
     349          16 :         Text.iterateCodePoints(&nStartPos, -1); // issue #i27703# treat end position as part of last sentence
     350         450 :     nStartPos = sentence.aBreakIterator->following(nStartPos);
     351             : 
     352         450 :     sal_Int32 nPos=nStartPos;
     353         450 :     while (nPos > 0 && u_isWhitespace(Text.iterateCodePoints(&nPos, -1))) nStartPos=nPos;
     354             : 
     355         450 :     return nStartPos;
     356             : }
     357             : 
     358       84034 : LineBreakResults SAL_CALL BreakIterator_Unicode::getLineBreak(
     359             :         const OUString& Text, sal_Int32 nStartPos,
     360             :         const lang::Locale& rLocale, sal_Int32 nMinBreakPos,
     361             :         const LineBreakHyphenationOptions& hOptions,
     362             :         const LineBreakUserOptions& /*rOptions*/ ) throw(uno::RuntimeException, std::exception)
     363             : {
     364       84034 :     LineBreakResults lbr;
     365             : 
     366       84034 :     if (nStartPos >= Text.getLength()) {
     367           0 :         lbr.breakIndex = Text.getLength();
     368           0 :         lbr.breakType = BreakType::WORDBOUNDARY;
     369           0 :         return lbr;
     370             :     }
     371             : 
     372       84034 :     loadICUBreakIterator(rLocale, LOAD_LINE_BREAKITERATOR, 0, lineRule, Text);
     373             : 
     374       84034 :     sal_Bool GlueSpace=sal_True;
     375      252102 :     while (GlueSpace) {
     376       84034 :         if (line.aBreakIterator->preceding(nStartPos + 1) == nStartPos) { //Line boundary break
     377       23533 :             lbr.breakIndex = nStartPos;
     378       23533 :             lbr.breakType = BreakType::WORDBOUNDARY;
     379       60501 :         } else if (hOptions.rHyphenator.is()) { //Hyphenation break
     380       34193 :             sal_Int32 boundary_with_punctuation = (line.aBreakIterator->next() != BreakIterator::DONE) ? line.aBreakIterator->current() : 0;
     381       34193 :             line.aBreakIterator->preceding(nStartPos + 1); // reset to check correct hyphenation of "word-word"
     382             : 
     383       34193 :             sal_Int32 nStartPosWordEnd = nStartPos;
     384       69833 :             while (line.aBreakIterator->current() < nStartPosWordEnd && u_ispunct((sal_uInt32)Text[nStartPosWordEnd])) // starting punctuation
     385        1447 :                 nStartPosWordEnd --;
     386             : 
     387             :             Boundary wBoundary = getWordBoundary( Text, nStartPosWordEnd, rLocale,
     388       34193 :                 WordType::DICTIONARY_WORD, false);
     389             : 
     390       34193 :             nStartPosWordEnd = wBoundary.endPos;
     391       69213 :             while (nStartPosWordEnd < Text.getLength() && (u_ispunct((sal_uInt32)Text[nStartPosWordEnd]))) // ending punctuation
     392         827 :                 nStartPosWordEnd ++;
     393       34193 :             nStartPosWordEnd = nStartPosWordEnd - wBoundary.endPos;
     394       34193 :             if (hOptions.hyphenIndex - wBoundary.startPos < nStartPosWordEnd) nStartPosWordEnd = hOptions.hyphenIndex - wBoundary.startPos;
     395             : #define SPACE 0x0020
     396       34193 :             while (boundary_with_punctuation > wBoundary.endPos && Text[--boundary_with_punctuation] == SPACE);
     397       34193 :             if (boundary_with_punctuation != 0) boundary_with_punctuation += 1 - wBoundary.endPos;
     398       34193 :             uno::Reference< linguistic2::XHyphenatedWord > aHyphenatedWord;
     399      102579 :             aHyphenatedWord = hOptions.rHyphenator->hyphenate(Text.copy(wBoundary.startPos,
     400             :                         wBoundary.endPos - wBoundary.startPos), rLocale,
     401       68386 :                     (sal_Int16) (hOptions.hyphenIndex - wBoundary.startPos - nStartPosWordEnd), hOptions.aHyphenationOptions);
     402       34193 :             if (aHyphenatedWord.is()) {
     403           0 :                 lbr.rHyphenatedWord = aHyphenatedWord;
     404           0 :                 if(wBoundary.startPos + aHyphenatedWord->getHyphenationPos() + 1 < nMinBreakPos )
     405           0 :                     lbr.breakIndex = -1;
     406             :                 else
     407           0 :                     lbr.breakIndex = wBoundary.startPos; //aHyphenatedWord->getHyphenationPos();
     408           0 :                 lbr.breakType = BreakType::HYPHENATION;
     409             : 
     410             :                 // check not optimal hyphenation of "word-word" (word with hyphens)
     411           0 :                 if (lbr.breakIndex > -1 && wBoundary.startPos + aHyphenatedWord->getHyphenationPos() < line.aBreakIterator->current()) {
     412           0 :                     lbr.breakIndex = line.aBreakIterator->current();
     413           0 :                     lbr.breakType = BreakType::WORDBOUNDARY;
     414             :                 }
     415             : 
     416             :             } else {
     417       34193 :                 lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
     418       34193 :                 lbr.breakType = BreakType::WORDBOUNDARY;;
     419       34193 :             }
     420             :         } else { //word boundary break
     421       26308 :             lbr.breakIndex = line.aBreakIterator->preceding(nStartPos);
     422       26308 :             lbr.breakType = BreakType::WORDBOUNDARY;
     423             :         }
     424             : 
     425             : #define WJ 0x2060   // Word Joiner
     426       84034 :         GlueSpace=sal_False;
     427       84034 :         if (lbr.breakType == BreakType::WORDBOUNDARY) {
     428       84034 :             nStartPos = lbr.breakIndex;
     429       84034 :             if (Text[nStartPos--] == WJ)
     430           0 :                 GlueSpace=sal_True;
     431      306508 :             while (nStartPos >= 0 &&
     432      111915 :                     (u_isWhitespace(Text.iterateCodePoints(&nStartPos, 0)) || Text[nStartPos] == WJ)) {
     433       32993 :                 if (Text[nStartPos--] == WJ)
     434           0 :                     GlueSpace=sal_True;
     435             :             }
     436       84034 :             if (GlueSpace && nStartPos < 0)  {
     437           0 :                 lbr.breakIndex = 0;
     438           0 :                 break;
     439             :             }
     440             :         }
     441             :     }
     442             : 
     443       84034 :     return lbr;
     444             : }
     445             : 
     446             : OUString SAL_CALL
     447           0 : BreakIterator_Unicode::getImplementationName(void) throw( uno::RuntimeException, std::exception )
     448             : {
     449           0 :     return OUString::createFromAscii(cBreakIterator);
     450             : }
     451             : 
     452             : sal_Bool SAL_CALL
     453           0 : BreakIterator_Unicode::supportsService(const OUString& rServiceName) throw( uno::RuntimeException, std::exception )
     454             : {
     455           0 :     return cppu::supportsService(this, rServiceName);
     456             : }
     457             : 
     458             : uno::Sequence< OUString > SAL_CALL
     459           0 : BreakIterator_Unicode::getSupportedServiceNames(void) throw( uno::RuntimeException, std::exception )
     460             : {
     461           0 :     uno::Sequence< OUString > aRet(1);
     462           0 :     aRet[0] = OUString::createFromAscii(cBreakIterator);
     463           0 :     return aRet;
     464             : }
     465             : 
     466             : } } } }
     467             : 
     468             : extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * SAL_CALL
     469        2587 : com_sun_star_i18n_BreakIterator_Unicode_get_implementation(
     470             :     css::uno::XComponentContext *,
     471             :     css::uno::Sequence<css::uno::Any> const &)
     472             : {
     473        2587 :     return cppu::acquire(new css::i18n::BreakIterator_Unicode());
     474             : }
     475             : 
     476             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10