Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #if defined(WNT)
21 : #include <prewin.h>
22 : #include <postwin.h>
23 : #endif
24 :
25 : #include <com/sun/star/uno/Reference.h>
26 : #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
27 :
28 : #include <cppuhelper/factory.hxx>
29 : #include <cppuhelper/supportsservice.hxx>
30 : #include <com/sun/star/registry/XRegistryKey.hpp>
31 : #include <i18nlangtag/languagetag.hxx>
32 : #include <tools/debug.hxx>
33 : #include <osl/mutex.hxx>
34 :
35 : #include <hyphen.h>
36 : #include <hyphenimp.hxx>
37 :
38 : #include <linguistic/hyphdta.hxx>
39 : #include <rtl/ustring.hxx>
40 : #include <rtl/ustrbuf.hxx>
41 : #include <rtl/textenc.h>
42 :
43 : #include <linguistic/lngprops.hxx>
44 : #include <linguistic/misc.hxx>
45 : #include <unotools/pathoptions.hxx>
46 : #include <unotools/useroptions.hxx>
47 : #include <unotools/lingucfg.hxx>
48 : #include <osl/file.hxx>
49 :
50 : #include <stdio.h>
51 : #include <string.h>
52 :
53 : #include <list>
54 : #include <set>
55 : #include <boost/scoped_array.hpp>
56 :
57 : using namespace utl;
58 : using namespace osl;
59 : using namespace com::sun::star;
60 : using namespace com::sun::star::beans;
61 : using namespace com::sun::star::lang;
62 : using namespace com::sun::star::uno;
63 : using namespace com::sun::star::linguistic2;
64 : using namespace linguistic;
65 :
66 : // min, max
67 : #define Max(a,b) (a > b ? a : b)
68 :
69 43 : Hyphenator::Hyphenator() :
70 43 : aEvtListeners ( GetLinguMutex() )
71 : {
72 43 : bDisposing = false;
73 43 : pPropHelper = NULL;
74 43 : aDicts = NULL;
75 43 : numdict = 0;
76 43 : }
77 :
78 126 : Hyphenator::~Hyphenator()
79 : {
80 42 : if (numdict && aDicts)
81 : {
82 84 : for (int i=0; i < numdict; ++i)
83 : {
84 42 : delete aDicts[i].apCC;
85 42 : if (aDicts[i].aPtr)
86 4 : hnj_hyphen_free(aDicts[i].aPtr);
87 : }
88 : }
89 42 : delete[] aDicts;
90 :
91 42 : if (pPropHelper)
92 : {
93 0 : pPropHelper->RemoveAsPropListener();
94 0 : delete pPropHelper;
95 : }
96 84 : }
97 :
98 5 : PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl()
99 : {
100 5 : if (!pPropHelper)
101 : {
102 5 : Reference< XLinguProperties > xPropSet( GetLinguProperties(), UNO_QUERY );
103 :
104 5 : pPropHelper = new PropertyHelper_Hyphenation (static_cast<XHyphenator *>(this), xPropSet );
105 5 : pPropHelper->AddAsPropListener(); //! after a reference is established
106 : }
107 5 : return *pPropHelper;
108 : }
109 :
110 86 : Sequence< Locale > SAL_CALL Hyphenator::getLocales()
111 : throw(RuntimeException, std::exception)
112 : {
113 86 : MutexGuard aGuard( GetLinguMutex() );
114 :
115 : // this routine should return the locales supported by the installed
116 : // dictionaries.
117 86 : if (!numdict)
118 : {
119 43 : SvtLinguConfig aLinguCfg;
120 :
121 : // get list of dictionaries-to-use
122 : // (or better speaking: the list of dictionaries using the
123 : // new configuration entries).
124 86 : std::list< SvtLinguConfigDictionaryEntry > aDics;
125 86 : uno::Sequence< OUString > aFormatList;
126 : aLinguCfg.GetSupportedDictionaryFormatsFor( "Hyphenators",
127 43 : "org.openoffice.lingu.LibHnjHyphenator", aFormatList );
128 43 : sal_Int32 nLen = aFormatList.getLength();
129 86 : for (sal_Int32 i = 0; i < nLen; ++i)
130 : {
131 : std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
132 43 : aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
133 43 : aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
134 43 : }
135 :
136 : //!! for compatibility with old dictionaries (the ones not using extensions
137 : //!! or new configuration entries, but still using the dictionary.lst file)
138 : //!! Get the list of old style spell checking dictionaries to use...
139 : std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
140 86 : GetOldStyleDics( "HYPH" ) );
141 :
142 : // to prefer dictionaries with configuration entries we will only
143 : // use those old style dictionaries that add a language that
144 : // is not yet supported by the list od new style dictionaries
145 43 : MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
146 :
147 43 : numdict = aDics.size();
148 43 : if (numdict)
149 : {
150 : // get supported locales from the dictionaries-to-use...
151 43 : sal_Int32 k = 0;
152 43 : std::set< OUString, lt_rtl_OUString > aLocaleNamesSet;
153 43 : std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
154 86 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
155 : {
156 43 : uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
157 43 : sal_Int32 nLen2 = aLocaleNames.getLength();
158 86 : for (k = 0; k < nLen2; ++k)
159 : {
160 43 : aLocaleNamesSet.insert( aLocaleNames[k] );
161 : }
162 43 : }
163 : // ... and add them to the resulting sequence
164 43 : aSuppLocales.realloc( aLocaleNamesSet.size() );
165 43 : std::set< OUString, lt_rtl_OUString >::const_iterator aItB;
166 43 : k = 0;
167 86 : for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB)
168 : {
169 43 : Locale aTmp( LanguageTag::convertToLocale( *aItB ));
170 43 : aSuppLocales[k++] = aTmp;
171 43 : }
172 :
173 : //! For each dictionary and each locale we need a separate entry.
174 : //! If this results in more than one dictionary per locale than (for now)
175 : //! it is undefined which dictionary gets used.
176 : //! In the future the implementation should support using several dictionaries
177 : //! for one locale.
178 43 : numdict = 0;
179 86 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
180 43 : numdict = numdict + aDictIt->aLocaleNames.getLength();
181 :
182 : // add dictionary information
183 43 : aDicts = new HDInfo[numdict];
184 :
185 43 : k = 0;
186 86 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
187 : {
188 86 : if (aDictIt->aLocaleNames.getLength() > 0 &&
189 43 : aDictIt->aLocations.getLength() > 0)
190 : {
191 43 : uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
192 43 : sal_Int32 nLocales = aLocaleNames.getLength();
193 :
194 : // currently only one language per dictionary is supported in the actual implementation...
195 : // Thus here we work-around this by adding the same dictionary several times.
196 : // Once for each of it's supported locales.
197 86 : for (sal_Int32 i = 0; i < nLocales; ++i)
198 : {
199 43 : LanguageTag aLanguageTag( aDictIt->aLocaleNames[i] );
200 43 : aDicts[k].aPtr = NULL;
201 43 : aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
202 43 : aDicts[k].aLoc = aLanguageTag.getLocale();
203 43 : aDicts[k].apCC = new CharClass( aLanguageTag );
204 : // also both files have to be in the same directory and the
205 : // file names must only differ in the extension (.aff/.dic).
206 : // Thus we use the first location only and strip the extension part.
207 86 : OUString aLocation = aDictIt->aLocations[0];
208 43 : sal_Int32 nPos = aLocation.lastIndexOf( '.' );
209 43 : aLocation = aLocation.copy( 0, nPos );
210 43 : aDicts[k].aName = aLocation;
211 :
212 43 : ++k;
213 86 : }
214 : }
215 : }
216 43 : DBG_ASSERT( k == numdict, "index mismatch?" );
217 : }
218 : else
219 : {
220 : // no dictionary found so register no dictionaries
221 0 : numdict = 0;
222 0 : aDicts = NULL;
223 0 : aSuppLocales.realloc(0);
224 43 : }
225 : }
226 :
227 86 : return aSuppLocales;
228 : }
229 :
230 5240 : sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
231 : throw(RuntimeException, std::exception)
232 : {
233 5240 : MutexGuard aGuard( GetLinguMutex() );
234 :
235 5240 : bool bRes = false;
236 5240 : if (!aSuppLocales.getLength())
237 0 : getLocales();
238 :
239 5240 : const Locale *pLocale = aSuppLocales.getConstArray();
240 5240 : sal_Int32 nLen = aSuppLocales.getLength();
241 5240 : for (sal_Int32 i = 0; i < nLen; ++i)
242 : {
243 5240 : if (rLocale == pLocale[i])
244 : {
245 5240 : bRes = true;
246 5240 : break;
247 : }
248 : }
249 5240 : return bRes;
250 : }
251 :
252 5235 : Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord,
253 : const ::com::sun::star::lang::Locale& aLocale,
254 : sal_Int16 nMaxLeading,
255 : const ::com::sun::star::beans::PropertyValues& aProperties )
256 : throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException, std::exception)
257 : {
258 5235 : int k = 0;
259 :
260 5235 : PropertyHelper_Hyphenation& rHelper = GetPropHelper();
261 5235 : rHelper.SetTmpPropVals(aProperties);
262 5235 : sal_Int16 minTrail = rHelper.GetMinTrailing();
263 5235 : sal_Int16 minLead = rHelper.GetMinLeading();
264 5235 : sal_Int16 minLen = rHelper.GetMinWordLength();
265 :
266 5235 : HyphenDict *dict = NULL;
267 5235 : rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
268 :
269 5235 : Reference< XHyphenatedWord > xRes;
270 :
271 5235 : k = -1;
272 10470 : for (int j = 0; j < numdict; j++)
273 : {
274 5235 : if (aLocale == aDicts[j].aLoc)
275 5235 : k = j;
276 : }
277 :
278 : // if we have a hyphenation dictionary matching this locale
279 5235 : if (k != -1)
280 : {
281 5235 : int nHyphenationPos = -1;
282 5235 : int nHyphenationPosAlt = -1;
283 5235 : int nHyphenationPosAltHyph = -1;
284 :
285 : // if this dictinary has not been loaded yet do that
286 5235 : if (!aDicts[k].aPtr)
287 : {
288 5 : OUString DictFN = aDicts[k].aName + ".dic";
289 10 : OUString dictpath;
290 :
291 5 : osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
292 :
293 : #if defined(WNT)
294 : // Hyphen waits UTF-8 encoded paths with \\?\ long path prefix.
295 : OString sTmp = Win_AddLongPathPrefix(OUStringToOString(dictpath, RTL_TEXTENCODING_UTF8));
296 : #else
297 10 : OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
298 : #endif
299 :
300 5 : if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
301 : {
302 0 : fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
303 0 : return NULL;
304 : }
305 5 : aDicts[k].aPtr = dict;
306 10 : aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
307 : }
308 :
309 : // other wise hyphenate the word with that dictionary
310 5235 : dict = aDicts[k].aPtr;
311 5235 : eEnc = aDicts[k].eEnc;
312 5235 : CharClass * pCC = aDicts[k].apCC;
313 :
314 : // we don't want to work with a default text encoding since following incorrect
315 : // results may occur only for specific text and thus may be hard to notice.
316 : // Thus better always make a clean exit here if the text encoding is in question.
317 : // Hopefully something not working at all will raise proper attention quickly. ;-)
318 : DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
319 5235 : if (eEnc == RTL_TEXTENCODING_DONTKNOW)
320 0 : return NULL;
321 :
322 5235 : CapType ct = capitalType(aWord, pCC);
323 :
324 : // first convert any smart quotes or apostrophes to normal ones
325 5235 : OUStringBuffer rBuf(aWord);
326 5235 : sal_Int32 nc = rBuf.getLength();
327 : sal_Unicode ch;
328 38986 : for (sal_Int32 ix=0; ix < nc; ix++)
329 : {
330 33751 : ch = rBuf[ix];
331 33751 : if ((ch == 0x201C) || (ch == 0x201D))
332 0 : rBuf[ix] = (sal_Unicode)0x0022;
333 33751 : if ((ch == 0x2018) || (ch == 0x2019))
334 0 : rBuf[ix] = (sal_Unicode)0x0027;
335 : }
336 10470 : OUString nWord(rBuf.makeStringAndClear());
337 :
338 : // now convert word to all lowercase for pattern recognition
339 10470 : OUString nTerm(makeLowerCase(nWord, pCC));
340 :
341 : // now convert word to needed encoding
342 10470 : OString encWord(OU2ENC(nTerm,eEnc));
343 :
344 5235 : int wordlen = encWord.getLength();
345 10470 : boost::scoped_array<char> lcword(new char[wordlen + 1]);
346 10470 : boost::scoped_array<char> hyphens(new char[wordlen + 5]);
347 :
348 5235 : char ** rep = NULL; // replacements of discretionary hyphenation
349 5235 : int * pos = NULL; // array of [hyphenation point] minus [deletion position]
350 5235 : int * cut = NULL; // length of deletions in original word
351 :
352 : // copy converted word into simple char buffer
353 5235 : strcpy(lcword.get(),encWord.getStr());
354 :
355 : // now strip off any ending periods
356 5235 : int n = wordlen-1;
357 10470 : while((n >=0) && (lcword[n] == '.'))
358 0 : n--;
359 5235 : n++;
360 5235 : if (n > 0)
361 : {
362 5235 : const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword.get(), n, hyphens.get(), NULL,
363 : &rep, &pos, &cut, minLead, minTrail,
364 5235 : Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
365 15705 : Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
366 5235 : if (bFailed)
367 : {
368 : // whoops something did not work
369 0 : if (rep)
370 : {
371 0 : for(int j = 0; j < n; j++)
372 : {
373 0 : if (rep[j]) free(rep[j]);
374 : }
375 0 : free(rep);
376 : }
377 0 : if (pos) free(pos);
378 0 : if (cut) free(cut);
379 0 : return NULL;
380 : }
381 : }
382 :
383 : // now backfill hyphens[] for any removed trailing periods
384 5235 : for (int c = n; c < wordlen; c++) hyphens[c] = '0';
385 5235 : hyphens[wordlen] = '\0';
386 :
387 5235 : sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading );
388 :
389 38986 : for (sal_Int32 i = 0; i < n; i++)
390 : {
391 33751 : int leftrep = 0;
392 33751 : bool hit = (n >= minLen);
393 33751 : if (!rep || !rep[i] || (i >= n))
394 : {
395 33751 : hit = hit && (hyphens[i]&1) && (i < Leading);
396 33751 : hit = hit && (i >= (minLead-1) );
397 33751 : hit = hit && ((n - i - 1) >= minTrail);
398 : }
399 : else
400 : {
401 : // calculate change character length before hyphenation point signed with '='
402 0 : for (char * c = rep[i]; *c && (*c != '='); c++)
403 : {
404 0 : if (eEnc == RTL_TEXTENCODING_UTF8)
405 : {
406 0 : if (((unsigned char) *c) >> 6 != 2)
407 0 : leftrep++;
408 : }
409 : else
410 0 : leftrep++;
411 : }
412 0 : hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
413 0 : hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
414 0 : hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
415 : }
416 33751 : if (hit)
417 : {
418 8 : nHyphenationPos = i;
419 8 : if (rep && (i < n) && rep[i])
420 : {
421 0 : nHyphenationPosAlt = i - pos[i];
422 0 : nHyphenationPosAltHyph = i + leftrep - pos[i];
423 : }
424 : }
425 : }
426 :
427 5235 : if (nHyphenationPos == -1)
428 : {
429 5227 : xRes = NULL;
430 : }
431 : else
432 : {
433 8 : if (rep && rep[nHyphenationPos])
434 : {
435 : // remove equal sign
436 0 : char * s = rep[nHyphenationPos];
437 0 : int eq = 0;
438 0 : for (; *s; s++)
439 : {
440 0 : if (*s == '=') eq = 1;
441 0 : if (eq) *s = *(s + 1);
442 : }
443 0 : OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
444 0 : OUString repHyph;
445 0 : switch (ct)
446 : {
447 : case CapType::ALLCAP:
448 : {
449 0 : repHyph = makeUpperCase(repHyphlow, pCC);
450 0 : break;
451 : }
452 : case CapType::INITCAP:
453 : {
454 0 : if (nHyphenationPosAlt == -1)
455 0 : repHyph = makeInitCap(repHyphlow, pCC);
456 : else
457 0 : repHyph = repHyphlow;
458 0 : break;
459 : }
460 : default:
461 : {
462 0 : repHyph = repHyphlow;
463 0 : break;
464 : }
465 : }
466 :
467 : // handle shortening
468 : sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
469 0 : nHyphenationPosAltHyph : nHyphenationPos);
470 : // dicretionary hyphenation
471 0 : xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos,
472 0 : aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
473 0 : (sal_Int16) nHyphenationPosAltHyph);
474 : }
475 : else
476 : {
477 16 : xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ),
478 8 : (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos);
479 : }
480 : }
481 :
482 5235 : if (rep)
483 : {
484 0 : for(int j = 0; j < n; j++)
485 : {
486 0 : if (rep[j]) free(rep[j]);
487 : }
488 0 : free(rep);
489 : }
490 5235 : if (pos) free(pos);
491 5235 : if (cut) free(cut);
492 10470 : return xRes;
493 : }
494 0 : return NULL;
495 : }
496 :
497 0 : Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
498 : const OUString& aWord,
499 : const ::com::sun::star::lang::Locale& aLocale,
500 : sal_Int16 nIndex,
501 : const ::com::sun::star::beans::PropertyValues& aProperties )
502 : throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException, std::exception)
503 : {
504 : // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
505 0 : for (int extrachar = 1; extrachar <= 2; extrachar++)
506 : {
507 0 : Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties);
508 0 : if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
509 0 : return xRes;
510 0 : }
511 0 : return NULL;
512 : }
513 :
514 : #if defined(WNT)
515 : static OString Win_GetShortPathName( const OUString &rLongPathName )
516 : {
517 : OString aRes;
518 :
519 : sal_Unicode aShortBuffer[1024] = {0};
520 : sal_Int32 nShortBufSize = SAL_N_ELEMENTS( aShortBuffer );
521 :
522 : // use the version of 'GetShortPathName' that can deal with Unicode...
523 : sal_Int32 nShortLen = GetShortPathNameW(
524 : reinterpret_cast<LPCWSTR>( rLongPathName.getStr() ),
525 : reinterpret_cast<LPWSTR>( aShortBuffer ),
526 : nShortBufSize );
527 :
528 : if (nShortLen < nShortBufSize) // conversion successful?
529 : aRes = OString( OU2ENC( OUString( aShortBuffer, nShortLen ), osl_getThreadTextEncoding()) );
530 : else
531 : OSL_FAIL( "Win_GetShortPathName: buffer to short" );
532 :
533 : return aRes;
534 : }
535 : #endif //defined(WNT)
536 :
537 0 : Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord,
538 : const ::com::sun::star::lang::Locale& aLocale,
539 : const ::com::sun::star::beans::PropertyValues& aProperties )
540 : throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException, std::exception)
541 : {
542 0 : PropertyHelper_Hyphenation& rHelper = GetPropHelper();
543 0 : rHelper.SetTmpPropVals(aProperties);
544 0 : sal_Int16 minTrail = rHelper.GetMinTrailing();
545 0 : sal_Int16 minLead = rHelper.GetMinLeading();
546 0 : sal_Int16 minLen = rHelper.GetMinWordLength();
547 :
548 : // Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as
549 : // well as "hyphenate"
550 0 : if (aWord.getLength() < minLen)
551 : {
552 0 : return PossibleHyphens::CreatePossibleHyphens( aWord, LinguLocaleToLanguage( aLocale ),
553 0 : aWord, Sequence< sal_Int16 >() );
554 : }
555 :
556 0 : int k = -1;
557 0 : for (int j = 0; j < numdict; j++)
558 : {
559 0 : if (aLocale == aDicts[j].aLoc) k = j;
560 : }
561 :
562 : // if we have a hyphenation dictionary matching this locale
563 0 : if (k != -1)
564 : {
565 0 : HyphenDict *dict = NULL;
566 : // if this dictioanry has not been loaded yet do that
567 0 : if (!aDicts[k].aPtr)
568 : {
569 0 : OUString DictFN = aDicts[k].aName + ".dic";
570 0 : OUString dictpath;
571 :
572 0 : osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
573 0 : OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
574 :
575 : #if defined(WNT)
576 : // workaround for Windows specific problem that the
577 : // path length in calls to 'fopen' is limted to somewhat
578 : // about 120+ characters which will usually be exceed when
579 : // using dictionaries as extensions.
580 : sTmp = Win_GetShortPathName( dictpath );
581 : #endif
582 :
583 0 : if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
584 : {
585 0 : fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
586 0 : return NULL;
587 : }
588 0 : aDicts[k].aPtr = dict;
589 0 : aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
590 : }
591 :
592 : // other wise hyphenate the word with that dictionary
593 0 : dict = aDicts[k].aPtr;
594 0 : rtl_TextEncoding eEnc = aDicts[k].eEnc;
595 0 : CharClass* pCC = aDicts[k].apCC;
596 :
597 : // we don't want to work with a default text encoding since following incorrect
598 : // results may occur only for specific text and thus may be hard to notice.
599 : // Thus better always make a clean exit here if the text encoding is in question.
600 : // Hopefully something not working at all will raise proper attention quickly. ;-)
601 : DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
602 0 : if (eEnc == RTL_TEXTENCODING_DONTKNOW)
603 0 : return NULL;
604 :
605 : // first handle smart quotes both single and double
606 0 : OUStringBuffer rBuf(aWord);
607 0 : sal_Int32 nc = rBuf.getLength();
608 : sal_Unicode ch;
609 0 : for (sal_Int32 ix=0; ix < nc; ix++)
610 : {
611 0 : ch = rBuf[ix];
612 0 : if ((ch == 0x201C) || (ch == 0x201D))
613 0 : rBuf[ix] = (sal_Unicode)0x0022;
614 0 : if ((ch == 0x2018) || (ch == 0x2019))
615 0 : rBuf[ix] = (sal_Unicode)0x0027;
616 : }
617 0 : OUString nWord(rBuf.makeStringAndClear());
618 :
619 : // now convert word to all lowercase for pattern recognition
620 0 : OUString nTerm(makeLowerCase(nWord, pCC));
621 :
622 : // now convert word to needed encoding
623 0 : OString encWord(OU2ENC(nTerm,eEnc));
624 :
625 0 : int wordlen = encWord.getLength();
626 0 : boost::scoped_array<char> lcword(new char[wordlen+1]);
627 0 : boost::scoped_array<char> hyphens(new char[wordlen+5]);
628 0 : char ** rep = NULL; // replacements of discretionary hyphenation
629 0 : int * pos = NULL; // array of [hyphenation point] minus [deletion position]
630 0 : int * cut = NULL; // length of deletions in original word
631 :
632 : // copy converted word into simple char buffer
633 0 : strcpy(lcword.get(),encWord.getStr());
634 :
635 : // first remove any trailing periods
636 0 : int n = wordlen-1;
637 0 : while((n >=0) && (lcword[n] == '.'))
638 0 : n--;
639 0 : n++;
640 0 : if (n > 0)
641 : {
642 0 : const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword.get(), n, hyphens.get(), NULL,
643 : &rep, &pos, &cut, minLead, minTrail,
644 0 : Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
645 0 : Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
646 0 : if (bFailed)
647 : {
648 0 : if (rep)
649 : {
650 0 : for(int j = 0; j < n; j++)
651 : {
652 0 : if (rep[j]) free(rep[j]);
653 : }
654 0 : free(rep);
655 : }
656 0 : if (pos) free(pos);
657 0 : if (cut) free(cut);
658 :
659 0 : return NULL;
660 : }
661 : }
662 : // now backfill hyphens[] for any removed periods
663 0 : for (int c = n; c < wordlen; c++)
664 0 : hyphens[c] = '0';
665 0 : hyphens[wordlen] = '\0';
666 :
667 0 : sal_Int16 nHyphCount = 0;
668 : sal_Int16 i;
669 :
670 0 : for ( i = 0; i < encWord.getLength(); i++)
671 : {
672 0 : if (hyphens[i]&1)
673 0 : nHyphCount++;
674 : }
675 :
676 0 : Sequence< sal_Int16 > aHyphPos(nHyphCount);
677 0 : sal_Int16 *pPos = aHyphPos.getArray();
678 0 : OUStringBuffer hyphenatedWordBuffer;
679 0 : nHyphCount = 0;
680 :
681 0 : for (i = 0; i < nWord.getLength(); i++)
682 : {
683 0 : hyphenatedWordBuffer.append(aWord[i]);
684 : // hyphenation position
685 0 : if (hyphens[i]&1)
686 : {
687 0 : pPos[nHyphCount] = i;
688 0 : hyphenatedWordBuffer.append('=');
689 0 : nHyphCount++;
690 : }
691 : }
692 :
693 0 : OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
694 :
695 : Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens(
696 0 : aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos);
697 :
698 0 : if (rep)
699 : {
700 0 : for(int j = 0; j < n; j++)
701 : {
702 0 : if (rep[j]) free(rep[j]);
703 : }
704 0 : free(rep);
705 : }
706 0 : if (pos) free(pos);
707 0 : if (cut) free(cut);
708 :
709 0 : return xRes;
710 : }
711 :
712 0 : return NULL;
713 : }
714 :
715 5235 : OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC)
716 : {
717 5235 : if (pCC)
718 5235 : return pCC->lowercase(aTerm);
719 0 : return aTerm;
720 : }
721 :
722 0 : OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC)
723 : {
724 0 : if (pCC)
725 0 : return pCC->uppercase(aTerm);
726 0 : return aTerm;
727 : }
728 :
729 0 : OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC)
730 : {
731 0 : sal_Int32 tlen = aTerm.getLength();
732 0 : if ((pCC) && (tlen))
733 : {
734 0 : OUString bTemp = aTerm.copy(0,1);
735 0 : if (tlen > 1)
736 0 : return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) );
737 :
738 0 : return pCC->uppercase(bTemp, 0, 1);
739 : }
740 0 : return aTerm;
741 : }
742 :
743 43 : Reference< XInterface > SAL_CALL Hyphenator_CreateInstance(
744 : const Reference< XMultiServiceFactory > & /*rSMgr*/ )
745 : throw(Exception)
746 : {
747 43 : Reference< XInterface > xService = static_cast<cppu::OWeakObject*>(new Hyphenator);
748 43 : return xService;
749 : }
750 :
751 5 : sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
752 : const Reference< XLinguServiceEventListener >& rxLstnr )
753 : throw(RuntimeException, std::exception)
754 : {
755 5 : MutexGuard aGuard( GetLinguMutex() );
756 :
757 5 : bool bRes = false;
758 5 : if (!bDisposing && rxLstnr.is())
759 : {
760 5 : bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
761 : }
762 5 : return bRes;
763 : }
764 :
765 5 : sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
766 : const Reference< XLinguServiceEventListener >& rxLstnr )
767 : throw(RuntimeException, std::exception)
768 : {
769 5 : MutexGuard aGuard( GetLinguMutex() );
770 :
771 5 : bool bRes = false;
772 5 : if (!bDisposing && rxLstnr.is())
773 : {
774 0 : bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
775 : }
776 5 : return bRes;
777 : }
778 :
779 0 : OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
780 : throw(RuntimeException, std::exception)
781 : {
782 0 : MutexGuard aGuard( GetLinguMutex() );
783 0 : return OUString( "Libhyphen Hyphenator" );
784 : }
785 :
786 0 : void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
787 : throw(Exception, RuntimeException, std::exception)
788 : {
789 0 : MutexGuard aGuard( GetLinguMutex() );
790 :
791 0 : if (!pPropHelper)
792 : {
793 0 : sal_Int32 nLen = rArguments.getLength();
794 0 : if (2 == nLen)
795 : {
796 0 : Reference< XLinguProperties > xPropSet;
797 0 : rArguments.getConstArray()[0] >>= xPropSet;
798 : // rArguments.getConstArray()[1] >>= xDicList;
799 :
800 : //! Pointer allows for access of the non-UNO functions.
801 : //! And the reference to the UNO-functions while increasing
802 : //! the ref-count and will implicitly free the memory
803 : //! when the object is not longer used.
804 0 : pPropHelper = new PropertyHelper_Hyphenation( static_cast<XHyphenator *>(this), xPropSet );
805 0 : pPropHelper->AddAsPropListener(); //! after a reference is established
806 : }
807 : else {
808 : OSL_FAIL( "wrong number of arguments in sequence" );
809 : }
810 0 : }
811 0 : }
812 :
813 44 : void SAL_CALL Hyphenator::dispose()
814 : throw(RuntimeException, std::exception)
815 : {
816 44 : MutexGuard aGuard( GetLinguMutex() );
817 :
818 44 : if (!bDisposing)
819 : {
820 43 : bDisposing = true;
821 43 : EventObject aEvtObj( static_cast<XHyphenator *>(this) );
822 43 : aEvtListeners.disposeAndClear( aEvtObj );
823 43 : if (pPropHelper)
824 : {
825 5 : pPropHelper->RemoveAsPropListener();
826 5 : delete pPropHelper;
827 5 : pPropHelper = NULL;
828 43 : }
829 44 : }
830 44 : }
831 :
832 0 : void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
833 : throw(RuntimeException, std::exception)
834 : {
835 0 : MutexGuard aGuard( GetLinguMutex() );
836 :
837 0 : if (!bDisposing && rxListener.is())
838 0 : aEvtListeners.addInterface( rxListener );
839 0 : }
840 :
841 0 : void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
842 : throw(RuntimeException, std::exception)
843 : {
844 0 : MutexGuard aGuard( GetLinguMutex() );
845 :
846 0 : if (!bDisposing && rxListener.is())
847 0 : aEvtListeners.removeInterface( rxListener );
848 0 : }
849 :
850 : // Service specific part
851 44 : OUString SAL_CALL Hyphenator::getImplementationName()
852 : throw(RuntimeException, std::exception)
853 : {
854 44 : MutexGuard aGuard( GetLinguMutex() );
855 :
856 44 : return getImplementationName_Static();
857 : }
858 :
859 0 : sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
860 : throw(RuntimeException, std::exception)
861 : {
862 0 : return cppu::supportsService(this, ServiceName);
863 : }
864 :
865 1 : Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
866 : throw(RuntimeException, std::exception)
867 : {
868 1 : MutexGuard aGuard( GetLinguMutex() );
869 :
870 1 : return getSupportedServiceNames_Static();
871 : }
872 :
873 44 : Sequence< OUString > Hyphenator::getSupportedServiceNames_Static()
874 : throw()
875 : {
876 44 : MutexGuard aGuard( GetLinguMutex() );
877 :
878 44 : Sequence< OUString > aSNS( 1 ); // more than 1 service is possible, too
879 44 : aSNS.getArray()[0] = SN_HYPHENATOR;
880 44 : return aSNS;
881 : }
882 :
883 43 : void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName,
884 : XMultiServiceFactory * pServiceManager, void * )
885 : {
886 43 : void * pRet = 0;
887 43 : if ( Hyphenator::getImplementationName_Static().equalsAscii( pImplName ) )
888 : {
889 : Reference< XSingleServiceFactory > xFactory =
890 : cppu::createOneInstanceFactory(
891 : pServiceManager,
892 : Hyphenator::getImplementationName_Static(),
893 : Hyphenator_CreateInstance,
894 43 : Hyphenator::getSupportedServiceNames_Static());
895 : // acquire, because we return an interface pointer instead of a reference
896 43 : xFactory->acquire();
897 43 : pRet = xFactory.get();
898 : }
899 43 : return pRet;
900 : }
901 :
902 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|