LCOV - code coverage report
Current view: top level - lingucomponent/source/hyphenator/hyphen - hyphenimp.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 213 406 52.5 %
Date: 2014-11-03 Functions: 15 25 60.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #if defined(WNT)
      21             : #include <prewin.h>
      22             : #include <postwin.h>
      23             : #endif
      24             : 
      25             : #include <com/sun/star/uno/Reference.h>
      26             : #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
      27             : 
      28             : #include <cppuhelper/factory.hxx>
      29             : #include <cppuhelper/supportsservice.hxx>
      30             : #include <com/sun/star/registry/XRegistryKey.hpp>
      31             : #include <i18nlangtag/languagetag.hxx>
      32             : #include <tools/debug.hxx>
      33             : #include <osl/mutex.hxx>
      34             : 
      35             : #include <hyphen.h>
      36             : #include <hyphenimp.hxx>
      37             : 
      38             : #include <linguistic/hyphdta.hxx>
      39             : #include <rtl/ustring.hxx>
      40             : #include <rtl/ustrbuf.hxx>
      41             : #include <rtl/textenc.h>
      42             : 
      43             : #include <linguistic/lngprops.hxx>
      44             : #include <linguistic/misc.hxx>
      45             : #include <unotools/pathoptions.hxx>
      46             : #include <unotools/useroptions.hxx>
      47             : #include <unotools/lingucfg.hxx>
      48             : #include <osl/file.hxx>
      49             : 
      50             : #include <stdio.h>
      51             : #include <string.h>
      52             : 
      53             : #include <list>
      54             : #include <set>
      55             : #include <boost/scoped_array.hpp>
      56             : 
      57             : using namespace utl;
      58             : using namespace osl;
      59             : using namespace com::sun::star;
      60             : using namespace com::sun::star::beans;
      61             : using namespace com::sun::star::lang;
      62             : using namespace com::sun::star::uno;
      63             : using namespace com::sun::star::linguistic2;
      64             : using namespace linguistic;
      65             : 
      66             : // min, max
      67             : #define Max(a,b) (a > b ? a : b)
      68             : 
      69          44 : Hyphenator::Hyphenator() :
      70          44 :     aEvtListeners   ( GetLinguMutex() )
      71             : {
      72          44 :     bDisposing = false;
      73          44 :     pPropHelper = NULL;
      74          44 :     aDicts = NULL;
      75          44 :     numdict = 0;
      76          44 : }
      77             : 
      78         126 : Hyphenator::~Hyphenator()
      79             : {
      80          42 :     if (numdict && aDicts)
      81             :     {
      82        1008 :         for (int i=0; i < numdict; ++i)
      83             :         {
      84         966 :             delete aDicts[i].apCC;
      85         966 :             if (aDicts[i].aPtr)
      86           8 :                 hnj_hyphen_free(aDicts[i].aPtr);
      87             :         }
      88             :     }
      89          42 :     delete[] aDicts;
      90             : 
      91          42 :     if (pPropHelper)
      92             :     {
      93           0 :         pPropHelper->RemoveAsPropListener();
      94           0 :         delete pPropHelper;
      95             :     }
      96          84 : }
      97             : 
      98          10 : PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl()
      99             : {
     100          10 :     if (!pPropHelper)
     101             :     {
     102          10 :         Reference< XLinguProperties >   xPropSet( GetLinguProperties(), UNO_QUERY );
     103             : 
     104          10 :         pPropHelper = new PropertyHelper_Hyphenation ((XHyphenator *) this, xPropSet );
     105          10 :         pPropHelper->AddAsPropListener();   //! after a reference is established
     106             :     }
     107          10 :     return *pPropHelper;
     108             : }
     109             : 
     110          88 : Sequence< Locale > SAL_CALL Hyphenator::getLocales()
     111             :         throw(RuntimeException, std::exception)
     112             : {
     113          88 :     MutexGuard  aGuard( GetLinguMutex() );
     114             : 
     115             :     // this routine should return the locales supported by the installed
     116             :     // dictionaries.
     117          88 :     if (!numdict)
     118             :     {
     119          44 :         SvtLinguConfig aLinguCfg;
     120             : 
     121             :         // get list of dictionaries-to-use
     122             :         // (or better speaking: the list of dictionaries using the
     123             :         // new configuration entries).
     124          88 :         std::list< SvtLinguConfigDictionaryEntry > aDics;
     125          88 :         uno::Sequence< OUString > aFormatList;
     126             :         aLinguCfg.GetSupportedDictionaryFormatsFor( "Hyphenators",
     127          44 :                 "org.openoffice.lingu.LibHnjHyphenator", aFormatList );
     128          44 :         sal_Int32 nLen = aFormatList.getLength();
     129          88 :         for (sal_Int32 i = 0;  i < nLen;  ++i)
     130             :         {
     131             :             std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
     132          44 :                     aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
     133          44 :             aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
     134          44 :         }
     135             : 
     136             :         //!! for compatibility with old dictionaries (the ones not using extensions
     137             :         //!! or new configuration entries, but still using the dictionary.lst file)
     138             :         //!! Get the list of old style spell checking dictionaries to use...
     139             :         std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
     140          88 :                 GetOldStyleDics( "HYPH" ) );
     141             : 
     142             :         // to prefer dictionaries with configuration entries we will only
     143             :         // use those old style dictionaries that add a language that
     144             :         // is not yet supported by the list od new style dictionaries
     145          44 :         MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
     146             : 
     147          44 :         numdict = aDics.size();
     148          44 :         if (numdict)
     149             :         {
     150             :             // get supported locales from the dictionaries-to-use...
     151          44 :             sal_Int32 k = 0;
     152          44 :             std::set< OUString, lt_rtl_OUString > aLocaleNamesSet;
     153          44 :             std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
     154        1056 :             for (aDictIt = aDics.begin();  aDictIt != aDics.end();  ++aDictIt)
     155             :             {
     156        1012 :                 uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
     157        1012 :                 sal_Int32 nLen2 = aLocaleNames.getLength();
     158        2024 :                 for (k = 0;  k < nLen2;  ++k)
     159             :                 {
     160        1012 :                     aLocaleNamesSet.insert( aLocaleNames[k] );
     161             :                 }
     162        1012 :             }
     163             :             // ... and add them to the resulting sequence
     164          44 :             aSuppLocales.realloc( aLocaleNamesSet.size() );
     165          44 :             std::set< OUString, lt_rtl_OUString >::const_iterator aItB;
     166          44 :             k = 0;
     167        1056 :             for (aItB = aLocaleNamesSet.begin();  aItB != aLocaleNamesSet.end();  ++aItB)
     168             :             {
     169        1012 :                 Locale aTmp( LanguageTag::convertToLocale( *aItB ));
     170        1012 :                 aSuppLocales[k++] = aTmp;
     171        1012 :             }
     172             : 
     173             :             //! For each dictionary and each locale we need a separate entry.
     174             :             //! If this results in more than one dictionary per locale than (for now)
     175             :             //! it is undefined which dictionary gets used.
     176             :             //! In the future the implementation should support using several dictionaries
     177             :             //! for one locale.
     178          44 :             numdict = 0;
     179        1056 :             for (aDictIt = aDics.begin();  aDictIt != aDics.end();  ++aDictIt)
     180        1012 :                 numdict = numdict + aDictIt->aLocaleNames.getLength();
     181             : 
     182             :             // add dictionary information
     183          44 :             aDicts = new HDInfo[numdict];
     184             : 
     185          44 :             k = 0;
     186        1056 :             for (aDictIt = aDics.begin();  aDictIt != aDics.end();  ++aDictIt)
     187             :             {
     188        2024 :                 if (aDictIt->aLocaleNames.getLength() > 0 &&
     189        1012 :                     aDictIt->aLocations.getLength() > 0)
     190             :                 {
     191        1012 :                     uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
     192        1012 :                     sal_Int32 nLocales = aLocaleNames.getLength();
     193             : 
     194             :                     // currently only one language per dictionary is supported in the actual implementation...
     195             :                     // Thus here we work-around this by adding the same dictionary several times.
     196             :                     // Once for each of it's supported locales.
     197        2024 :                     for (sal_Int32 i = 0;  i < nLocales;  ++i)
     198             :                     {
     199        1012 :                         LanguageTag aLanguageTag( aDictIt->aLocaleNames[i] );
     200        1012 :                         aDicts[k].aPtr = NULL;
     201        1012 :                         aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
     202        1012 :                         aDicts[k].aLoc = aLanguageTag.getLocale();
     203        1012 :                         aDicts[k].apCC = new CharClass( aLanguageTag );
     204             :                         // also both files have to be in the same directory and the
     205             :                         // file names must only differ in the extension (.aff/.dic).
     206             :                         // Thus we use the first location only and strip the extension part.
     207        2024 :                         OUString aLocation = aDictIt->aLocations[0];
     208        1012 :                         sal_Int32 nPos = aLocation.lastIndexOf( '.' );
     209        1012 :                         aLocation = aLocation.copy( 0, nPos );
     210        1012 :                         aDicts[k].aName = aLocation;
     211             : 
     212        1012 :                         ++k;
     213        2024 :                     }
     214             :                 }
     215             :             }
     216          44 :             DBG_ASSERT( k == numdict, "index mismatch?" );
     217             :         }
     218             :         else
     219             :         {
     220             :             // no dictionary found so register no dictionaries
     221           0 :             numdict = 0;
     222           0 :             aDicts = NULL;
     223           0 :             aSuppLocales.realloc(0);
     224          44 :         }
     225             :     }
     226             : 
     227          88 :     return aSuppLocales;
     228             : }
     229             : 
     230       21612 : sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
     231             :         throw(RuntimeException, std::exception)
     232             : {
     233       21612 :     MutexGuard  aGuard( GetLinguMutex() );
     234             : 
     235       21612 :     bool bRes = false;
     236       21612 :     if (!aSuppLocales.getLength())
     237           0 :         getLocales();
     238             : 
     239       21612 :     const Locale *pLocale = aSuppLocales.getConstArray();
     240       21612 :     sal_Int32 nLen = aSuppLocales.getLength();
     241      432240 :     for (sal_Int32 i = 0;  i < nLen;  ++i)
     242             :     {
     243      432240 :         if (rLocale == pLocale[i])
     244             :         {
     245       21612 :             bRes = true;
     246       21612 :             break;
     247             :         }
     248             :     }
     249       21612 :     return bRes;
     250             : }
     251             : 
     252       21602 : Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord,
     253             :        const ::com::sun::star::lang::Locale& aLocale,
     254             :        sal_Int16 nMaxLeading,
     255             :        const ::com::sun::star::beans::PropertyValues& aProperties )
     256             :        throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException, std::exception)
     257             : {
     258       21602 :     int nHyphenationPos = -1;
     259       21602 :     int nHyphenationPosAlt = -1;
     260       21602 :     int nHyphenationPosAltHyph = -1;
     261             :     int wordlen;
     262       21602 :     int k = 0;
     263             : 
     264       21602 :     PropertyHelper_Hyphenation& rHelper = GetPropHelper();
     265       21602 :     rHelper.SetTmpPropVals(aProperties);
     266       21602 :     sal_Int16 minTrail = rHelper.GetMinTrailing();
     267       21602 :     sal_Int16 minLead = rHelper.GetMinLeading();
     268       21602 :     sal_Int16 minLen = rHelper.GetMinWordLength();
     269             : 
     270       21602 :     HyphenDict *dict = NULL;
     271       21602 :     rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
     272       21602 :     CharClass * pCC = NULL;
     273             : 
     274       21602 :     Reference< XHyphenatedWord > xRes;
     275             : 
     276       21602 :     k = -1;
     277      518448 :     for (int j = 0; j < numdict; j++)
     278             :     {
     279      496846 :         if (aLocale == aDicts[j].aLoc)
     280       21602 :             k = j;
     281             :     }
     282             : 
     283             :     // if we have a hyphenation dictionary matching this locale
     284       21602 :     if (k != -1)
     285             :     {
     286             :         // if this dictinary has not been loaded yet do that
     287       21602 :         if (!aDicts[k].aPtr)
     288             :         {
     289          10 :             OUString DictFN = aDicts[k].aName + ".dic";
     290          20 :             OUString dictpath;
     291             : 
     292          10 :             osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
     293             : 
     294             : #if defined(WNT)
     295             :             // Hyphen waits UTF-8 encoded paths with \\?\ long path prefix.
     296             :             OString sTmp = Win_AddLongPathPrefix(OUStringToOString(dictpath, RTL_TEXTENCODING_UTF8));
     297             : #else
     298          20 :             OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
     299             : #endif
     300             : 
     301          10 :             if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
     302             :             {
     303           0 :                fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
     304           0 :                return NULL;
     305             :             }
     306          10 :             aDicts[k].aPtr = dict;
     307          20 :             aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
     308             :         }
     309             : 
     310             :         // other wise hyphenate the word with that dictionary
     311       21602 :         dict = aDicts[k].aPtr;
     312       21602 :         eEnc = aDicts[k].eEnc;
     313       21602 :         pCC =  aDicts[k].apCC;
     314             : 
     315             :         // we don't want to work with a default text encoding since following incorrect
     316             :         // results may occur only for specific text and thus may be hard to notice.
     317             :         // Thus better always make a clean exit here if the text encoding is in question.
     318             :         // Hopefully something not working at all will raise proper attention quickly. ;-)
     319             :         DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
     320       21602 :         if (eEnc == RTL_TEXTENCODING_DONTKNOW)
     321           0 :             return NULL;
     322             : 
     323       21602 :         sal_uInt16 ct = capitalType(aWord, pCC);
     324             : 
     325             :         // first convert any smart quotes or apostrophes to normal ones
     326       21602 :         OUStringBuffer rBuf(aWord);
     327       21602 :         sal_Int32 nc = rBuf.getLength();
     328             :         sal_Unicode ch;
     329      113654 :         for (sal_Int32 ix=0; ix < nc; ix++)
     330             :         {
     331       92052 :             ch = rBuf[ix];
     332       92052 :             if ((ch == 0x201C) || (ch == 0x201D))
     333           0 :                 rBuf[ix] = (sal_Unicode)0x0022;
     334       92052 :             if ((ch == 0x2018) || (ch == 0x2019))
     335           0 :                 rBuf[ix] = (sal_Unicode)0x0027;
     336             :         }
     337       43204 :         OUString nWord(rBuf.makeStringAndClear());
     338             : 
     339             :         // now convert word to all lowercase for pattern recognition
     340       43204 :         OUString nTerm(makeLowerCase(nWord, pCC));
     341             : 
     342             :         // now convert word to needed encoding
     343       43204 :         OString encWord(OU2ENC(nTerm,eEnc));
     344             : 
     345       21602 :         wordlen = encWord.getLength();
     346       43204 :         boost::scoped_array<char> lcword(new char[wordlen + 1]);
     347       43204 :         boost::scoped_array<char> hyphens(new char[wordlen + 5]);
     348             : 
     349       21602 :         char ** rep = NULL; // replacements of discretionary hyphenation
     350       21602 :         int * pos = NULL; // array of [hyphenation point] minus [deletion position]
     351       21602 :         int * cut = NULL; // length of deletions in original word
     352             : 
     353             :         // copy converted word into simple char buffer
     354       21602 :         strcpy(lcword.get(),encWord.getStr());
     355             : 
     356             :         // now strip off any ending periods
     357       21602 :         int n = wordlen-1;
     358       43204 :         while((n >=0) && (lcword[n] == '.'))
     359           0 :             n--;
     360       21602 :         n++;
     361       21602 :         if (n > 0)
     362             :         {
     363       21602 :             const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword.get(), n, hyphens.get(), NULL,
     364             :                     &rep, &pos, &cut, minLead, minTrail,
     365       21602 :                     Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead  - Max(dict->lhmin, 2))),
     366       64806 :                     Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
     367       21602 :             if (bFailed)
     368             :             {
     369             :                 // whoops something did not work
     370           0 :                 if (rep)
     371             :                 {
     372           0 :                     for(int j = 0; j < n; j++)
     373             :                     {
     374           0 :                         if (rep[j]) free(rep[j]);
     375             :                     }
     376           0 :                     free(rep);
     377             :                 }
     378           0 :                 if (pos) free(pos);
     379           0 :                 if (cut) free(cut);
     380           0 :                 return NULL;
     381             :             }
     382             :         }
     383             : 
     384             :         // now backfill hyphens[] for any removed trailing periods
     385       21602 :         for (int c = n; c < wordlen; c++) hyphens[c] = '0';
     386       21602 :         hyphens[wordlen] = '\0';
     387             : 
     388       21602 :         sal_Int32 Leading =  GetPosInWordToCheck( aWord, nMaxLeading );
     389             : 
     390      113654 :         for (sal_Int32 i = 0; i < n; i++)
     391             :         {
     392       92052 :             int leftrep = 0;
     393       92052 :             bool hit = (n >= minLen);
     394       92052 :             if (!rep || !rep[i] || (i >= n))
     395             :             {
     396       92052 :                 hit = hit && (hyphens[i]&1) && (i < Leading);
     397       92052 :                 hit = hit && (i >= (minLead-1) );
     398       92052 :                 hit = hit && ((n - i - 1) >= minTrail);
     399             :             }
     400             :             else
     401             :             {
     402             :                 // calculate change character length before hyphenation point signed with '='
     403           0 :                 for (char * c = rep[i]; *c && (*c != '='); c++)
     404             :                 {
     405           0 :                     if (eEnc == RTL_TEXTENCODING_UTF8)
     406             :                     {
     407           0 :                         if (((unsigned char) *c) >> 6 != 2)
     408           0 :                             leftrep++;
     409             :                     }
     410             :                     else
     411           0 :                         leftrep++;
     412             :                 }
     413           0 :                 hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
     414           0 :                 hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
     415           0 :                 hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
     416             :             }
     417       92052 :             if (hit)
     418             :             {
     419         593 :                 nHyphenationPos = i;
     420         593 :                 if (rep && (i < n) && rep[i])
     421             :                 {
     422           0 :                     nHyphenationPosAlt = i - pos[i];
     423           0 :                     nHyphenationPosAltHyph = i + leftrep - pos[i];
     424             :                 }
     425             :             }
     426             :         }
     427             : 
     428       21602 :         if (nHyphenationPos  == -1)
     429             :         {
     430       21009 :             xRes = NULL;
     431             :         }
     432             :         else
     433             :         {
     434         593 :             if (rep && rep[nHyphenationPos])
     435             :             {
     436             :                 // remove equal sign
     437           0 :                 char * s = rep[nHyphenationPos];
     438           0 :                 int eq = 0;
     439           0 :                 for (; *s; s++)
     440             :                 {
     441           0 :                     if (*s == '=') eq = 1;
     442           0 :                     if (eq) *s = *(s + 1);
     443             :                 }
     444           0 :                 OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
     445           0 :                 OUString repHyph;
     446           0 :                 switch (ct)
     447             :                 {
     448             :                     case CAPTYPE_ALLCAP:
     449             :                     {
     450           0 :                         repHyph = makeUpperCase(repHyphlow, pCC);
     451           0 :                         break;
     452             :                     }
     453             :                     case CAPTYPE_INITCAP:
     454             :                     {
     455           0 :                         if (nHyphenationPosAlt == -1)
     456           0 :                             repHyph = makeInitCap(repHyphlow, pCC);
     457             :                         else
     458           0 :                              repHyph = repHyphlow;
     459           0 :                         break;
     460             :                     }
     461             :                     default:
     462             :                     {
     463           0 :                         repHyph = repHyphlow;
     464           0 :                         break;
     465             :                     }
     466             :                 }
     467             : 
     468             :                 // handle shortening
     469             :                 sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
     470           0 :                 nHyphenationPosAltHyph : nHyphenationPos);
     471             :                 // dicretionary hyphenation
     472           0 :                 xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos,
     473           0 :                     aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
     474           0 :                     (sal_Int16) nHyphenationPosAltHyph);
     475             :             }
     476             :             else
     477             :             {
     478        1186 :                 xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ),
     479         593 :                     (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos);
     480             :             }
     481             :         }
     482             : 
     483       21602 :         if (rep)
     484             :         {
     485           0 :             for(int j = 0; j < n; j++)
     486             :             {
     487           0 :                 if (rep[j]) free(rep[j]);
     488             :             }
     489           0 :             free(rep);
     490             :         }
     491       21602 :         if (pos) free(pos);
     492       21602 :         if (cut) free(cut);
     493       43204 :         return xRes;
     494             :     }
     495           0 :     return NULL;
     496             : }
     497             : 
     498           0 : Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
     499             :         const OUString& aWord,
     500             :         const ::com::sun::star::lang::Locale& aLocale,
     501             :         sal_Int16 nIndex,
     502             :         const ::com::sun::star::beans::PropertyValues& aProperties )
     503             :         throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException, std::exception)
     504             : {
     505             :     // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
     506           0 :     for (int extrachar = 1; extrachar <= 2; extrachar++)
     507             :     {
     508           0 :         Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties);
     509           0 :         if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
     510           0 :             return xRes;
     511           0 :     }
     512           0 :     return NULL;
     513             : }
     514             : 
     515             : #if defined(WNT)
     516             : static OString Win_GetShortPathName( const OUString &rLongPathName )
     517             : {
     518             :     OString aRes;
     519             : 
     520             :     sal_Unicode aShortBuffer[1024] = {0};
     521             :     sal_Int32   nShortBufSize = SAL_N_ELEMENTS( aShortBuffer );
     522             : 
     523             :     // use the version of 'GetShortPathName' that can deal with Unicode...
     524             :     sal_Int32 nShortLen = GetShortPathNameW(
     525             :             reinterpret_cast<LPCWSTR>( rLongPathName.getStr() ),
     526             :             reinterpret_cast<LPWSTR>( aShortBuffer ),
     527             :             nShortBufSize );
     528             : 
     529             :     if (nShortLen < nShortBufSize) // conversion successful?
     530             :         aRes = OString( OU2ENC( OUString( aShortBuffer, nShortLen ), osl_getThreadTextEncoding()) );
     531             :     else
     532             :         OSL_FAIL( "Win_GetShortPathName: buffer to short" );
     533             : 
     534             :     return aRes;
     535             : }
     536             : #endif //defined(WNT)
     537             : 
     538           0 : Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord,
     539             :         const ::com::sun::star::lang::Locale& aLocale,
     540             :         const ::com::sun::star::beans::PropertyValues& aProperties )
     541             :         throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException, std::exception)
     542             : {
     543           0 :     PropertyHelper_Hyphenation& rHelper = GetPropHelper();
     544           0 :     rHelper.SetTmpPropVals(aProperties);
     545           0 :     sal_Int16 minTrail = rHelper.GetMinTrailing();
     546           0 :     sal_Int16 minLead = rHelper.GetMinLeading();
     547           0 :     sal_Int16 minLen = rHelper.GetMinWordLength();
     548             : 
     549             :     // Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as
     550             :     // well as "hyphenate"
     551           0 :     if (aWord.getLength() < minLen)
     552             :     {
     553           0 :         return PossibleHyphens::CreatePossibleHyphens( aWord, LinguLocaleToLanguage( aLocale ),
     554           0 :                       aWord, Sequence< sal_Int16 >() );
     555             :     }
     556             : 
     557           0 :     int k = -1;
     558           0 :     for (int j = 0; j < numdict; j++)
     559             :     {
     560           0 :         if (aLocale == aDicts[j].aLoc) k = j;
     561             :     }
     562             : 
     563             :     // if we have a hyphenation dictionary matching this locale
     564           0 :     if (k != -1)
     565             :     {
     566           0 :         HyphenDict *dict = NULL;
     567             :         // if this dictioanry has not been loaded yet do that
     568           0 :         if (!aDicts[k].aPtr)
     569             :         {
     570           0 :             OUString DictFN = aDicts[k].aName + ".dic";
     571           0 :             OUString dictpath;
     572             : 
     573           0 :             osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
     574           0 :             OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
     575             : 
     576             : #if defined(WNT)
     577             :             // workaround for Windows specific problem that the
     578             :             // path length in calls to 'fopen' is limted to somewhat
     579             :             // about 120+ characters which will usually be exceed when
     580             :             // using dictionaries as extensions.
     581             :             sTmp = Win_GetShortPathName( dictpath );
     582             : #endif
     583             : 
     584           0 :             if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
     585             :             {
     586           0 :                fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
     587           0 :                return NULL;
     588             :             }
     589           0 :             aDicts[k].aPtr = dict;
     590           0 :             aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
     591             :         }
     592             : 
     593             :         // other wise hyphenate the word with that dictionary
     594           0 :         dict = aDicts[k].aPtr;
     595           0 :         rtl_TextEncoding eEnc = aDicts[k].eEnc;
     596           0 :         CharClass* pCC = aDicts[k].apCC;
     597             : 
     598             :         // we don't want to work with a default text encoding since following incorrect
     599             :         // results may occur only for specific text and thus may be hard to notice.
     600             :         // Thus better always make a clean exit here if the text encoding is in question.
     601             :         // Hopefully something not working at all will raise proper attention quickly. ;-)
     602             :         DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
     603           0 :         if (eEnc == RTL_TEXTENCODING_DONTKNOW)
     604           0 :             return NULL;
     605             : 
     606             :         // first handle smart quotes both single and double
     607           0 :         OUStringBuffer rBuf(aWord);
     608           0 :         sal_Int32 nc = rBuf.getLength();
     609             :         sal_Unicode ch;
     610           0 :         for (sal_Int32 ix=0; ix < nc; ix++)
     611             :         {
     612           0 :             ch = rBuf[ix];
     613           0 :             if ((ch == 0x201C) || (ch == 0x201D))
     614           0 :                 rBuf[ix] = (sal_Unicode)0x0022;
     615           0 :             if ((ch == 0x2018) || (ch == 0x2019))
     616           0 :                 rBuf[ix] = (sal_Unicode)0x0027;
     617             :         }
     618           0 :         OUString nWord(rBuf.makeStringAndClear());
     619             : 
     620             :         // now convert word to all lowercase for pattern recognition
     621           0 :         OUString nTerm(makeLowerCase(nWord, pCC));
     622             : 
     623             :         // now convert word to needed encoding
     624           0 :         OString encWord(OU2ENC(nTerm,eEnc));
     625             : 
     626           0 :         int wordlen = encWord.getLength();
     627           0 :         boost::scoped_array<char> lcword(new char[wordlen+1]);
     628           0 :         boost::scoped_array<char> hyphens(new char[wordlen+5]);
     629           0 :         char ** rep = NULL; // replacements of discretionary hyphenation
     630           0 :         int * pos = NULL; // array of [hyphenation point] minus [deletion position]
     631           0 :         int * cut = NULL; // length of deletions in original word
     632             : 
     633             :         // copy converted word into simple char buffer
     634           0 :         strcpy(lcword.get(),encWord.getStr());
     635             : 
     636             :         // first remove any trailing periods
     637           0 :         int n = wordlen-1;
     638           0 :         while((n >=0) && (lcword[n] == '.'))
     639           0 :             n--;
     640           0 :         n++;
     641           0 :         if (n > 0)
     642             :         {
     643           0 :             const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword.get(), n, hyphens.get(), NULL,
     644             :                     &rep, &pos, &cut, minLead, minTrail,
     645           0 :                     Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
     646           0 :                     Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
     647           0 :             if (bFailed)
     648             :             {
     649           0 :                 if (rep)
     650             :                 {
     651           0 :                     for(int j = 0; j < n; j++)
     652             :                     {
     653           0 :                         if (rep[j]) free(rep[j]);
     654             :                     }
     655           0 :                     free(rep);
     656             :                 }
     657           0 :                 if (pos) free(pos);
     658           0 :                 if (cut) free(cut);
     659             : 
     660           0 :                 return NULL;
     661             :             }
     662             :         }
     663             :         // now backfill hyphens[] for any removed periods
     664           0 :         for (int c = n; c < wordlen; c++)
     665           0 :             hyphens[c] = '0';
     666           0 :         hyphens[wordlen] = '\0';
     667             : 
     668           0 :         sal_Int16 nHyphCount = 0;
     669             :         sal_Int16 i;
     670             : 
     671           0 :         for ( i = 0; i < encWord.getLength(); i++)
     672             :         {
     673           0 :             if (hyphens[i]&1)
     674           0 :                 nHyphCount++;
     675             :         }
     676             : 
     677           0 :         Sequence< sal_Int16 > aHyphPos(nHyphCount);
     678           0 :         sal_Int16 *pPos = aHyphPos.getArray();
     679           0 :         OUStringBuffer hyphenatedWordBuffer;
     680           0 :         nHyphCount = 0;
     681             : 
     682           0 :         for (i = 0; i < nWord.getLength(); i++)
     683             :         {
     684           0 :             hyphenatedWordBuffer.append(aWord[i]);
     685             :             // hyphenation position
     686           0 :             if (hyphens[i]&1)
     687             :             {
     688           0 :                 pPos[nHyphCount] = i;
     689           0 :                 hyphenatedWordBuffer.append('=');
     690           0 :                 nHyphCount++;
     691             :             }
     692             :         }
     693             : 
     694           0 :         OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
     695             : 
     696             :         Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens(
     697           0 :             aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos);
     698             : 
     699           0 :         if (rep)
     700             :         {
     701           0 :             for(int j = 0; j < n; j++)
     702             :             {
     703           0 :                 if (rep[j]) free(rep[j]);
     704             :             }
     705           0 :             free(rep);
     706             :         }
     707           0 :         if (pos) free(pos);
     708           0 :         if (cut) free(cut);
     709             : 
     710           0 :         return xRes;
     711             :     }
     712             : 
     713           0 :     return NULL;
     714             : }
     715             : 
     716       21602 : OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC)
     717             : {
     718       21602 :     if (pCC)
     719       21602 :         return pCC->lowercase(aTerm);
     720           0 :     return aTerm;
     721             : }
     722             : 
     723           0 : OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC)
     724             : {
     725           0 :     if (pCC)
     726           0 :         return pCC->uppercase(aTerm);
     727           0 :     return aTerm;
     728             : }
     729             : 
     730           0 : OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC)
     731             : {
     732           0 :     sal_Int32 tlen = aTerm.getLength();
     733           0 :     if ((pCC) && (tlen))
     734             :     {
     735           0 :         OUString bTemp = aTerm.copy(0,1);
     736           0 :         if (tlen > 1)
     737           0 :             return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) );
     738             : 
     739           0 :         return pCC->uppercase(bTemp, 0, 1);
     740             :     }
     741           0 :     return aTerm;
     742             : }
     743             : 
     744          44 : Reference< XInterface > SAL_CALL Hyphenator_CreateInstance(
     745             :         const Reference< XMultiServiceFactory > & /*rSMgr*/ )
     746             :         throw(Exception)
     747             : {
     748          44 :     Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator;
     749          44 :     return xService;
     750             : }
     751             : 
     752          10 : sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
     753             :         const Reference< XLinguServiceEventListener >& rxLstnr )
     754             :         throw(RuntimeException, std::exception)
     755             : {
     756          10 :     MutexGuard  aGuard( GetLinguMutex() );
     757             : 
     758          10 :     bool bRes = false;
     759          10 :     if (!bDisposing && rxLstnr.is())
     760             :     {
     761          10 :         bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
     762             :     }
     763          10 :     return bRes;
     764             : }
     765             : 
     766          10 : sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
     767             :         const Reference< XLinguServiceEventListener >& rxLstnr )
     768             :         throw(RuntimeException, std::exception)
     769             : {
     770          10 :     MutexGuard  aGuard( GetLinguMutex() );
     771             : 
     772          10 :     bool bRes = false;
     773          10 :     if (!bDisposing && rxLstnr.is())
     774             :     {
     775           0 :         bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
     776             :     }
     777          10 :     return bRes;
     778             : }
     779             : 
     780           0 : OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
     781             :         throw(RuntimeException, std::exception)
     782             : {
     783           0 :     MutexGuard  aGuard( GetLinguMutex() );
     784           0 :     return OUString( "Libhyphen Hyphenator" );
     785             : }
     786             : 
     787           0 : void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
     788             :         throw(Exception, RuntimeException, std::exception)
     789             : {
     790           0 :     MutexGuard  aGuard( GetLinguMutex() );
     791             : 
     792           0 :     if (!pPropHelper)
     793             :     {
     794           0 :         sal_Int32 nLen = rArguments.getLength();
     795           0 :         if (2 == nLen)
     796             :         {
     797           0 :             Reference< XLinguProperties >   xPropSet;
     798           0 :             rArguments.getConstArray()[0] >>= xPropSet;
     799             :             // rArguments.getConstArray()[1] >>= xDicList;
     800             : 
     801             :             //! Pointer allows for access of the non-UNO functions.
     802             :             //! And the reference to the UNO-functions while increasing
     803             :             //! the ref-count and will implicitly free the memory
     804             :             //! when the object is not longer used.
     805           0 :             pPropHelper = new PropertyHelper_Hyphenation( (XHyphenator *) this, xPropSet );
     806           0 :             pPropHelper->AddAsPropListener();   //! after a reference is established
     807             :         }
     808             :         else {
     809             :             OSL_FAIL( "wrong number of arguments in sequence" );
     810             :         }
     811           0 :     }
     812           0 : }
     813             : 
     814          44 : void SAL_CALL Hyphenator::dispose()
     815             :         throw(RuntimeException, std::exception)
     816             : {
     817          44 :     MutexGuard  aGuard( GetLinguMutex() );
     818             : 
     819          44 :     if (!bDisposing)
     820             :     {
     821          44 :         bDisposing = true;
     822          44 :         EventObject aEvtObj( (XHyphenator *) this );
     823          44 :         aEvtListeners.disposeAndClear( aEvtObj );
     824          44 :         if (pPropHelper)
     825             :         {
     826          10 :             pPropHelper->RemoveAsPropListener();
     827          10 :             delete pPropHelper;
     828          10 :             pPropHelper = NULL;
     829          44 :         }
     830          44 :     }
     831          44 : }
     832             : 
     833           0 : void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
     834             :         throw(RuntimeException, std::exception)
     835             : {
     836           0 :     MutexGuard  aGuard( GetLinguMutex() );
     837             : 
     838           0 :     if (!bDisposing && rxListener.is())
     839           0 :         aEvtListeners.addInterface( rxListener );
     840           0 : }
     841             : 
     842           0 : void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
     843             :         throw(RuntimeException, std::exception)
     844             : {
     845           0 :     MutexGuard  aGuard( GetLinguMutex() );
     846             : 
     847           0 :     if (!bDisposing && rxListener.is())
     848           0 :         aEvtListeners.removeInterface( rxListener );
     849           0 : }
     850             : 
     851             : // Service specific part
     852          44 : OUString SAL_CALL Hyphenator::getImplementationName()
     853             :         throw(RuntimeException, std::exception)
     854             : {
     855          44 :     MutexGuard  aGuard( GetLinguMutex() );
     856             : 
     857          44 :     return getImplementationName_Static();
     858             : }
     859             : 
     860           0 : sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
     861             :         throw(RuntimeException, std::exception)
     862             : {
     863           0 :     return cppu::supportsService(this, ServiceName);
     864             : }
     865             : 
     866           0 : Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
     867             :         throw(RuntimeException, std::exception)
     868             : {
     869           0 :     MutexGuard  aGuard( GetLinguMutex() );
     870             : 
     871           0 :     return getSupportedServiceNames_Static();
     872             : }
     873             : 
     874          44 : Sequence< OUString > Hyphenator::getSupportedServiceNames_Static()
     875             :         throw()
     876             : {
     877          44 :     MutexGuard  aGuard( GetLinguMutex() );
     878             : 
     879          44 :     Sequence< OUString > aSNS( 1 ); // more than 1 service is possible, too
     880          44 :     aSNS.getArray()[0] = SN_HYPHENATOR;
     881          44 :     return aSNS;
     882             : }
     883             : 
     884          44 : void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName,
     885             :             XMultiServiceFactory * pServiceManager, void *  )
     886             : {
     887          44 :     void * pRet = 0;
     888          44 :     if ( Hyphenator::getImplementationName_Static().equalsAscii( pImplName ) )
     889             :     {
     890             :         Reference< XSingleServiceFactory > xFactory =
     891             :             cppu::createOneInstanceFactory(
     892             :                 pServiceManager,
     893             :                 Hyphenator::getImplementationName_Static(),
     894             :                 Hyphenator_CreateInstance,
     895          44 :                 Hyphenator::getSupportedServiceNames_Static());
     896             :         // acquire, because we return an interface pointer instead of a reference
     897          44 :         xFactory->acquire();
     898          44 :         pRet = xFactory.get();
     899             :     }
     900          44 :     return pRet;
     901             : }
     902             : 
     903             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10