Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #if defined(WNT)
21 : #include <prewin.h>
22 : #include <postwin.h>
23 : #endif
24 :
25 : #include <com/sun/star/uno/Reference.h>
26 : #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
27 :
28 : #include <cppuhelper/factory.hxx>
29 : #include <cppuhelper/supportsservice.hxx>
30 : #include <com/sun/star/registry/XRegistryKey.hpp>
31 : #include <i18nlangtag/languagetag.hxx>
32 : #include <tools/debug.hxx>
33 : #include <osl/mutex.hxx>
34 :
35 : #include <hyphen.h>
36 : #include <hyphenimp.hxx>
37 :
38 : #include <linguistic/hyphdta.hxx>
39 : #include <rtl/ustring.hxx>
40 : #include <rtl/ustrbuf.hxx>
41 : #include <rtl/textenc.h>
42 :
43 : #include <linguistic/lngprops.hxx>
44 : #include <linguistic/misc.hxx>
45 : #include <unotools/pathoptions.hxx>
46 : #include <unotools/useroptions.hxx>
47 : #include <unotools/lingucfg.hxx>
48 : #include <osl/file.hxx>
49 :
50 : #include <stdio.h>
51 : #include <string.h>
52 :
53 : #include <list>
54 : #include <set>
55 : #include <boost/scoped_array.hpp>
56 :
57 : using namespace utl;
58 : using namespace osl;
59 : using namespace com::sun::star;
60 : using namespace com::sun::star::beans;
61 : using namespace com::sun::star::lang;
62 : using namespace com::sun::star::uno;
63 : using namespace com::sun::star::linguistic2;
64 : using namespace linguistic;
65 :
66 : // min, max
67 : #define Max(a,b) (a > b ? a : b)
68 :
69 44 : Hyphenator::Hyphenator() :
70 44 : aEvtListeners ( GetLinguMutex() )
71 : {
72 44 : bDisposing = false;
73 44 : pPropHelper = NULL;
74 44 : aDicts = NULL;
75 44 : numdict = 0;
76 44 : }
77 :
78 126 : Hyphenator::~Hyphenator()
79 : {
80 42 : if (numdict && aDicts)
81 : {
82 1008 : for (int i=0; i < numdict; ++i)
83 : {
84 966 : delete aDicts[i].apCC;
85 966 : if (aDicts[i].aPtr)
86 8 : hnj_hyphen_free(aDicts[i].aPtr);
87 : }
88 : }
89 42 : delete[] aDicts;
90 :
91 42 : if (pPropHelper)
92 : {
93 0 : pPropHelper->RemoveAsPropListener();
94 0 : delete pPropHelper;
95 : }
96 84 : }
97 :
98 10 : PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl()
99 : {
100 10 : if (!pPropHelper)
101 : {
102 10 : Reference< XLinguProperties > xPropSet( GetLinguProperties(), UNO_QUERY );
103 :
104 10 : pPropHelper = new PropertyHelper_Hyphenation ((XHyphenator *) this, xPropSet );
105 10 : pPropHelper->AddAsPropListener(); //! after a reference is established
106 : }
107 10 : return *pPropHelper;
108 : }
109 :
110 88 : Sequence< Locale > SAL_CALL Hyphenator::getLocales()
111 : throw(RuntimeException, std::exception)
112 : {
113 88 : MutexGuard aGuard( GetLinguMutex() );
114 :
115 : // this routine should return the locales supported by the installed
116 : // dictionaries.
117 88 : if (!numdict)
118 : {
119 44 : SvtLinguConfig aLinguCfg;
120 :
121 : // get list of dictionaries-to-use
122 : // (or better speaking: the list of dictionaries using the
123 : // new configuration entries).
124 88 : std::list< SvtLinguConfigDictionaryEntry > aDics;
125 88 : uno::Sequence< OUString > aFormatList;
126 : aLinguCfg.GetSupportedDictionaryFormatsFor( "Hyphenators",
127 44 : "org.openoffice.lingu.LibHnjHyphenator", aFormatList );
128 44 : sal_Int32 nLen = aFormatList.getLength();
129 88 : for (sal_Int32 i = 0; i < nLen; ++i)
130 : {
131 : std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
132 44 : aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
133 44 : aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
134 44 : }
135 :
136 : //!! for compatibility with old dictionaries (the ones not using extensions
137 : //!! or new configuration entries, but still using the dictionary.lst file)
138 : //!! Get the list of old style spell checking dictionaries to use...
139 : std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
140 88 : GetOldStyleDics( "HYPH" ) );
141 :
142 : // to prefer dictionaries with configuration entries we will only
143 : // use those old style dictionaries that add a language that
144 : // is not yet supported by the list od new style dictionaries
145 44 : MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
146 :
147 44 : numdict = aDics.size();
148 44 : if (numdict)
149 : {
150 : // get supported locales from the dictionaries-to-use...
151 44 : sal_Int32 k = 0;
152 44 : std::set< OUString, lt_rtl_OUString > aLocaleNamesSet;
153 44 : std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
154 1056 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
155 : {
156 1012 : uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
157 1012 : sal_Int32 nLen2 = aLocaleNames.getLength();
158 2024 : for (k = 0; k < nLen2; ++k)
159 : {
160 1012 : aLocaleNamesSet.insert( aLocaleNames[k] );
161 : }
162 1012 : }
163 : // ... and add them to the resulting sequence
164 44 : aSuppLocales.realloc( aLocaleNamesSet.size() );
165 44 : std::set< OUString, lt_rtl_OUString >::const_iterator aItB;
166 44 : k = 0;
167 1056 : for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB)
168 : {
169 1012 : Locale aTmp( LanguageTag::convertToLocale( *aItB ));
170 1012 : aSuppLocales[k++] = aTmp;
171 1012 : }
172 :
173 : //! For each dictionary and each locale we need a separate entry.
174 : //! If this results in more than one dictionary per locale than (for now)
175 : //! it is undefined which dictionary gets used.
176 : //! In the future the implementation should support using several dictionaries
177 : //! for one locale.
178 44 : numdict = 0;
179 1056 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
180 1012 : numdict = numdict + aDictIt->aLocaleNames.getLength();
181 :
182 : // add dictionary information
183 44 : aDicts = new HDInfo[numdict];
184 :
185 44 : k = 0;
186 1056 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
187 : {
188 2024 : if (aDictIt->aLocaleNames.getLength() > 0 &&
189 1012 : aDictIt->aLocations.getLength() > 0)
190 : {
191 1012 : uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
192 1012 : sal_Int32 nLocales = aLocaleNames.getLength();
193 :
194 : // currently only one language per dictionary is supported in the actual implementation...
195 : // Thus here we work-around this by adding the same dictionary several times.
196 : // Once for each of it's supported locales.
197 2024 : for (sal_Int32 i = 0; i < nLocales; ++i)
198 : {
199 1012 : LanguageTag aLanguageTag( aDictIt->aLocaleNames[i] );
200 1012 : aDicts[k].aPtr = NULL;
201 1012 : aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
202 1012 : aDicts[k].aLoc = aLanguageTag.getLocale();
203 1012 : aDicts[k].apCC = new CharClass( aLanguageTag );
204 : // also both files have to be in the same directory and the
205 : // file names must only differ in the extension (.aff/.dic).
206 : // Thus we use the first location only and strip the extension part.
207 2024 : OUString aLocation = aDictIt->aLocations[0];
208 1012 : sal_Int32 nPos = aLocation.lastIndexOf( '.' );
209 1012 : aLocation = aLocation.copy( 0, nPos );
210 1012 : aDicts[k].aName = aLocation;
211 :
212 1012 : ++k;
213 2024 : }
214 : }
215 : }
216 44 : DBG_ASSERT( k == numdict, "index mismatch?" );
217 : }
218 : else
219 : {
220 : // no dictionary found so register no dictionaries
221 0 : numdict = 0;
222 0 : aDicts = NULL;
223 0 : aSuppLocales.realloc(0);
224 44 : }
225 : }
226 :
227 88 : return aSuppLocales;
228 : }
229 :
230 21612 : sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
231 : throw(RuntimeException, std::exception)
232 : {
233 21612 : MutexGuard aGuard( GetLinguMutex() );
234 :
235 21612 : bool bRes = false;
236 21612 : if (!aSuppLocales.getLength())
237 0 : getLocales();
238 :
239 21612 : const Locale *pLocale = aSuppLocales.getConstArray();
240 21612 : sal_Int32 nLen = aSuppLocales.getLength();
241 432240 : for (sal_Int32 i = 0; i < nLen; ++i)
242 : {
243 432240 : if (rLocale == pLocale[i])
244 : {
245 21612 : bRes = true;
246 21612 : break;
247 : }
248 : }
249 21612 : return bRes;
250 : }
251 :
252 21602 : Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord,
253 : const ::com::sun::star::lang::Locale& aLocale,
254 : sal_Int16 nMaxLeading,
255 : const ::com::sun::star::beans::PropertyValues& aProperties )
256 : throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException, std::exception)
257 : {
258 21602 : int nHyphenationPos = -1;
259 21602 : int nHyphenationPosAlt = -1;
260 21602 : int nHyphenationPosAltHyph = -1;
261 : int wordlen;
262 21602 : int k = 0;
263 :
264 21602 : PropertyHelper_Hyphenation& rHelper = GetPropHelper();
265 21602 : rHelper.SetTmpPropVals(aProperties);
266 21602 : sal_Int16 minTrail = rHelper.GetMinTrailing();
267 21602 : sal_Int16 minLead = rHelper.GetMinLeading();
268 21602 : sal_Int16 minLen = rHelper.GetMinWordLength();
269 :
270 21602 : HyphenDict *dict = NULL;
271 21602 : rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
272 21602 : CharClass * pCC = NULL;
273 :
274 21602 : Reference< XHyphenatedWord > xRes;
275 :
276 21602 : k = -1;
277 518448 : for (int j = 0; j < numdict; j++)
278 : {
279 496846 : if (aLocale == aDicts[j].aLoc)
280 21602 : k = j;
281 : }
282 :
283 : // if we have a hyphenation dictionary matching this locale
284 21602 : if (k != -1)
285 : {
286 : // if this dictinary has not been loaded yet do that
287 21602 : if (!aDicts[k].aPtr)
288 : {
289 10 : OUString DictFN = aDicts[k].aName + ".dic";
290 20 : OUString dictpath;
291 :
292 10 : osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
293 :
294 : #if defined(WNT)
295 : // Hyphen waits UTF-8 encoded paths with \\?\ long path prefix.
296 : OString sTmp = Win_AddLongPathPrefix(OUStringToOString(dictpath, RTL_TEXTENCODING_UTF8));
297 : #else
298 20 : OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
299 : #endif
300 :
301 10 : if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
302 : {
303 0 : fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
304 0 : return NULL;
305 : }
306 10 : aDicts[k].aPtr = dict;
307 20 : aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
308 : }
309 :
310 : // other wise hyphenate the word with that dictionary
311 21602 : dict = aDicts[k].aPtr;
312 21602 : eEnc = aDicts[k].eEnc;
313 21602 : pCC = aDicts[k].apCC;
314 :
315 : // we don't want to work with a default text encoding since following incorrect
316 : // results may occur only for specific text and thus may be hard to notice.
317 : // Thus better always make a clean exit here if the text encoding is in question.
318 : // Hopefully something not working at all will raise proper attention quickly. ;-)
319 : DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
320 21602 : if (eEnc == RTL_TEXTENCODING_DONTKNOW)
321 0 : return NULL;
322 :
323 21602 : sal_uInt16 ct = capitalType(aWord, pCC);
324 :
325 : // first convert any smart quotes or apostrophes to normal ones
326 21602 : OUStringBuffer rBuf(aWord);
327 21602 : sal_Int32 nc = rBuf.getLength();
328 : sal_Unicode ch;
329 113654 : for (sal_Int32 ix=0; ix < nc; ix++)
330 : {
331 92052 : ch = rBuf[ix];
332 92052 : if ((ch == 0x201C) || (ch == 0x201D))
333 0 : rBuf[ix] = (sal_Unicode)0x0022;
334 92052 : if ((ch == 0x2018) || (ch == 0x2019))
335 0 : rBuf[ix] = (sal_Unicode)0x0027;
336 : }
337 43204 : OUString nWord(rBuf.makeStringAndClear());
338 :
339 : // now convert word to all lowercase for pattern recognition
340 43204 : OUString nTerm(makeLowerCase(nWord, pCC));
341 :
342 : // now convert word to needed encoding
343 43204 : OString encWord(OU2ENC(nTerm,eEnc));
344 :
345 21602 : wordlen = encWord.getLength();
346 43204 : boost::scoped_array<char> lcword(new char[wordlen + 1]);
347 43204 : boost::scoped_array<char> hyphens(new char[wordlen + 5]);
348 :
349 21602 : char ** rep = NULL; // replacements of discretionary hyphenation
350 21602 : int * pos = NULL; // array of [hyphenation point] minus [deletion position]
351 21602 : int * cut = NULL; // length of deletions in original word
352 :
353 : // copy converted word into simple char buffer
354 21602 : strcpy(lcword.get(),encWord.getStr());
355 :
356 : // now strip off any ending periods
357 21602 : int n = wordlen-1;
358 43204 : while((n >=0) && (lcword[n] == '.'))
359 0 : n--;
360 21602 : n++;
361 21602 : if (n > 0)
362 : {
363 21602 : const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword.get(), n, hyphens.get(), NULL,
364 : &rep, &pos, &cut, minLead, minTrail,
365 21602 : Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
366 64806 : Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
367 21602 : if (bFailed)
368 : {
369 : // whoops something did not work
370 0 : if (rep)
371 : {
372 0 : for(int j = 0; j < n; j++)
373 : {
374 0 : if (rep[j]) free(rep[j]);
375 : }
376 0 : free(rep);
377 : }
378 0 : if (pos) free(pos);
379 0 : if (cut) free(cut);
380 0 : return NULL;
381 : }
382 : }
383 :
384 : // now backfill hyphens[] for any removed trailing periods
385 21602 : for (int c = n; c < wordlen; c++) hyphens[c] = '0';
386 21602 : hyphens[wordlen] = '\0';
387 :
388 21602 : sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading );
389 :
390 113654 : for (sal_Int32 i = 0; i < n; i++)
391 : {
392 92052 : int leftrep = 0;
393 92052 : bool hit = (n >= minLen);
394 92052 : if (!rep || !rep[i] || (i >= n))
395 : {
396 92052 : hit = hit && (hyphens[i]&1) && (i < Leading);
397 92052 : hit = hit && (i >= (minLead-1) );
398 92052 : hit = hit && ((n - i - 1) >= minTrail);
399 : }
400 : else
401 : {
402 : // calculate change character length before hyphenation point signed with '='
403 0 : for (char * c = rep[i]; *c && (*c != '='); c++)
404 : {
405 0 : if (eEnc == RTL_TEXTENCODING_UTF8)
406 : {
407 0 : if (((unsigned char) *c) >> 6 != 2)
408 0 : leftrep++;
409 : }
410 : else
411 0 : leftrep++;
412 : }
413 0 : hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
414 0 : hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
415 0 : hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
416 : }
417 92052 : if (hit)
418 : {
419 593 : nHyphenationPos = i;
420 593 : if (rep && (i < n) && rep[i])
421 : {
422 0 : nHyphenationPosAlt = i - pos[i];
423 0 : nHyphenationPosAltHyph = i + leftrep - pos[i];
424 : }
425 : }
426 : }
427 :
428 21602 : if (nHyphenationPos == -1)
429 : {
430 21009 : xRes = NULL;
431 : }
432 : else
433 : {
434 593 : if (rep && rep[nHyphenationPos])
435 : {
436 : // remove equal sign
437 0 : char * s = rep[nHyphenationPos];
438 0 : int eq = 0;
439 0 : for (; *s; s++)
440 : {
441 0 : if (*s == '=') eq = 1;
442 0 : if (eq) *s = *(s + 1);
443 : }
444 0 : OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
445 0 : OUString repHyph;
446 0 : switch (ct)
447 : {
448 : case CAPTYPE_ALLCAP:
449 : {
450 0 : repHyph = makeUpperCase(repHyphlow, pCC);
451 0 : break;
452 : }
453 : case CAPTYPE_INITCAP:
454 : {
455 0 : if (nHyphenationPosAlt == -1)
456 0 : repHyph = makeInitCap(repHyphlow, pCC);
457 : else
458 0 : repHyph = repHyphlow;
459 0 : break;
460 : }
461 : default:
462 : {
463 0 : repHyph = repHyphlow;
464 0 : break;
465 : }
466 : }
467 :
468 : // handle shortening
469 : sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
470 0 : nHyphenationPosAltHyph : nHyphenationPos);
471 : // dicretionary hyphenation
472 0 : xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos,
473 0 : aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
474 0 : (sal_Int16) nHyphenationPosAltHyph);
475 : }
476 : else
477 : {
478 1186 : xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ),
479 593 : (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos);
480 : }
481 : }
482 :
483 21602 : if (rep)
484 : {
485 0 : for(int j = 0; j < n; j++)
486 : {
487 0 : if (rep[j]) free(rep[j]);
488 : }
489 0 : free(rep);
490 : }
491 21602 : if (pos) free(pos);
492 21602 : if (cut) free(cut);
493 43204 : return xRes;
494 : }
495 0 : return NULL;
496 : }
497 :
498 0 : Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
499 : const OUString& aWord,
500 : const ::com::sun::star::lang::Locale& aLocale,
501 : sal_Int16 nIndex,
502 : const ::com::sun::star::beans::PropertyValues& aProperties )
503 : throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException, std::exception)
504 : {
505 : // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
506 0 : for (int extrachar = 1; extrachar <= 2; extrachar++)
507 : {
508 0 : Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties);
509 0 : if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
510 0 : return xRes;
511 0 : }
512 0 : return NULL;
513 : }
514 :
515 : #if defined(WNT)
516 : static OString Win_GetShortPathName( const OUString &rLongPathName )
517 : {
518 : OString aRes;
519 :
520 : sal_Unicode aShortBuffer[1024] = {0};
521 : sal_Int32 nShortBufSize = SAL_N_ELEMENTS( aShortBuffer );
522 :
523 : // use the version of 'GetShortPathName' that can deal with Unicode...
524 : sal_Int32 nShortLen = GetShortPathNameW(
525 : reinterpret_cast<LPCWSTR>( rLongPathName.getStr() ),
526 : reinterpret_cast<LPWSTR>( aShortBuffer ),
527 : nShortBufSize );
528 :
529 : if (nShortLen < nShortBufSize) // conversion successful?
530 : aRes = OString( OU2ENC( OUString( aShortBuffer, nShortLen ), osl_getThreadTextEncoding()) );
531 : else
532 : OSL_FAIL( "Win_GetShortPathName: buffer to short" );
533 :
534 : return aRes;
535 : }
536 : #endif //defined(WNT)
537 :
538 0 : Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord,
539 : const ::com::sun::star::lang::Locale& aLocale,
540 : const ::com::sun::star::beans::PropertyValues& aProperties )
541 : throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException, std::exception)
542 : {
543 0 : PropertyHelper_Hyphenation& rHelper = GetPropHelper();
544 0 : rHelper.SetTmpPropVals(aProperties);
545 0 : sal_Int16 minTrail = rHelper.GetMinTrailing();
546 0 : sal_Int16 minLead = rHelper.GetMinLeading();
547 0 : sal_Int16 minLen = rHelper.GetMinWordLength();
548 :
549 : // Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as
550 : // well as "hyphenate"
551 0 : if (aWord.getLength() < minLen)
552 : {
553 0 : return PossibleHyphens::CreatePossibleHyphens( aWord, LinguLocaleToLanguage( aLocale ),
554 0 : aWord, Sequence< sal_Int16 >() );
555 : }
556 :
557 0 : int k = -1;
558 0 : for (int j = 0; j < numdict; j++)
559 : {
560 0 : if (aLocale == aDicts[j].aLoc) k = j;
561 : }
562 :
563 : // if we have a hyphenation dictionary matching this locale
564 0 : if (k != -1)
565 : {
566 0 : HyphenDict *dict = NULL;
567 : // if this dictioanry has not been loaded yet do that
568 0 : if (!aDicts[k].aPtr)
569 : {
570 0 : OUString DictFN = aDicts[k].aName + ".dic";
571 0 : OUString dictpath;
572 :
573 0 : osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
574 0 : OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
575 :
576 : #if defined(WNT)
577 : // workaround for Windows specific problem that the
578 : // path length in calls to 'fopen' is limted to somewhat
579 : // about 120+ characters which will usually be exceed when
580 : // using dictionaries as extensions.
581 : sTmp = Win_GetShortPathName( dictpath );
582 : #endif
583 :
584 0 : if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
585 : {
586 0 : fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
587 0 : return NULL;
588 : }
589 0 : aDicts[k].aPtr = dict;
590 0 : aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
591 : }
592 :
593 : // other wise hyphenate the word with that dictionary
594 0 : dict = aDicts[k].aPtr;
595 0 : rtl_TextEncoding eEnc = aDicts[k].eEnc;
596 0 : CharClass* pCC = aDicts[k].apCC;
597 :
598 : // we don't want to work with a default text encoding since following incorrect
599 : // results may occur only for specific text and thus may be hard to notice.
600 : // Thus better always make a clean exit here if the text encoding is in question.
601 : // Hopefully something not working at all will raise proper attention quickly. ;-)
602 : DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
603 0 : if (eEnc == RTL_TEXTENCODING_DONTKNOW)
604 0 : return NULL;
605 :
606 : // first handle smart quotes both single and double
607 0 : OUStringBuffer rBuf(aWord);
608 0 : sal_Int32 nc = rBuf.getLength();
609 : sal_Unicode ch;
610 0 : for (sal_Int32 ix=0; ix < nc; ix++)
611 : {
612 0 : ch = rBuf[ix];
613 0 : if ((ch == 0x201C) || (ch == 0x201D))
614 0 : rBuf[ix] = (sal_Unicode)0x0022;
615 0 : if ((ch == 0x2018) || (ch == 0x2019))
616 0 : rBuf[ix] = (sal_Unicode)0x0027;
617 : }
618 0 : OUString nWord(rBuf.makeStringAndClear());
619 :
620 : // now convert word to all lowercase for pattern recognition
621 0 : OUString nTerm(makeLowerCase(nWord, pCC));
622 :
623 : // now convert word to needed encoding
624 0 : OString encWord(OU2ENC(nTerm,eEnc));
625 :
626 0 : int wordlen = encWord.getLength();
627 0 : boost::scoped_array<char> lcword(new char[wordlen+1]);
628 0 : boost::scoped_array<char> hyphens(new char[wordlen+5]);
629 0 : char ** rep = NULL; // replacements of discretionary hyphenation
630 0 : int * pos = NULL; // array of [hyphenation point] minus [deletion position]
631 0 : int * cut = NULL; // length of deletions in original word
632 :
633 : // copy converted word into simple char buffer
634 0 : strcpy(lcword.get(),encWord.getStr());
635 :
636 : // first remove any trailing periods
637 0 : int n = wordlen-1;
638 0 : while((n >=0) && (lcword[n] == '.'))
639 0 : n--;
640 0 : n++;
641 0 : if (n > 0)
642 : {
643 0 : const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword.get(), n, hyphens.get(), NULL,
644 : &rep, &pos, &cut, minLead, minTrail,
645 0 : Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
646 0 : Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
647 0 : if (bFailed)
648 : {
649 0 : if (rep)
650 : {
651 0 : for(int j = 0; j < n; j++)
652 : {
653 0 : if (rep[j]) free(rep[j]);
654 : }
655 0 : free(rep);
656 : }
657 0 : if (pos) free(pos);
658 0 : if (cut) free(cut);
659 :
660 0 : return NULL;
661 : }
662 : }
663 : // now backfill hyphens[] for any removed periods
664 0 : for (int c = n; c < wordlen; c++)
665 0 : hyphens[c] = '0';
666 0 : hyphens[wordlen] = '\0';
667 :
668 0 : sal_Int16 nHyphCount = 0;
669 : sal_Int16 i;
670 :
671 0 : for ( i = 0; i < encWord.getLength(); i++)
672 : {
673 0 : if (hyphens[i]&1)
674 0 : nHyphCount++;
675 : }
676 :
677 0 : Sequence< sal_Int16 > aHyphPos(nHyphCount);
678 0 : sal_Int16 *pPos = aHyphPos.getArray();
679 0 : OUStringBuffer hyphenatedWordBuffer;
680 0 : nHyphCount = 0;
681 :
682 0 : for (i = 0; i < nWord.getLength(); i++)
683 : {
684 0 : hyphenatedWordBuffer.append(aWord[i]);
685 : // hyphenation position
686 0 : if (hyphens[i]&1)
687 : {
688 0 : pPos[nHyphCount] = i;
689 0 : hyphenatedWordBuffer.append('=');
690 0 : nHyphCount++;
691 : }
692 : }
693 :
694 0 : OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
695 :
696 : Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens(
697 0 : aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos);
698 :
699 0 : if (rep)
700 : {
701 0 : for(int j = 0; j < n; j++)
702 : {
703 0 : if (rep[j]) free(rep[j]);
704 : }
705 0 : free(rep);
706 : }
707 0 : if (pos) free(pos);
708 0 : if (cut) free(cut);
709 :
710 0 : return xRes;
711 : }
712 :
713 0 : return NULL;
714 : }
715 :
716 21602 : OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC)
717 : {
718 21602 : if (pCC)
719 21602 : return pCC->lowercase(aTerm);
720 0 : return aTerm;
721 : }
722 :
723 0 : OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC)
724 : {
725 0 : if (pCC)
726 0 : return pCC->uppercase(aTerm);
727 0 : return aTerm;
728 : }
729 :
730 0 : OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC)
731 : {
732 0 : sal_Int32 tlen = aTerm.getLength();
733 0 : if ((pCC) && (tlen))
734 : {
735 0 : OUString bTemp = aTerm.copy(0,1);
736 0 : if (tlen > 1)
737 0 : return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) );
738 :
739 0 : return pCC->uppercase(bTemp, 0, 1);
740 : }
741 0 : return aTerm;
742 : }
743 :
744 44 : Reference< XInterface > SAL_CALL Hyphenator_CreateInstance(
745 : const Reference< XMultiServiceFactory > & /*rSMgr*/ )
746 : throw(Exception)
747 : {
748 44 : Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator;
749 44 : return xService;
750 : }
751 :
752 10 : sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
753 : const Reference< XLinguServiceEventListener >& rxLstnr )
754 : throw(RuntimeException, std::exception)
755 : {
756 10 : MutexGuard aGuard( GetLinguMutex() );
757 :
758 10 : bool bRes = false;
759 10 : if (!bDisposing && rxLstnr.is())
760 : {
761 10 : bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
762 : }
763 10 : return bRes;
764 : }
765 :
766 10 : sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
767 : const Reference< XLinguServiceEventListener >& rxLstnr )
768 : throw(RuntimeException, std::exception)
769 : {
770 10 : MutexGuard aGuard( GetLinguMutex() );
771 :
772 10 : bool bRes = false;
773 10 : if (!bDisposing && rxLstnr.is())
774 : {
775 0 : bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
776 : }
777 10 : return bRes;
778 : }
779 :
780 0 : OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
781 : throw(RuntimeException, std::exception)
782 : {
783 0 : MutexGuard aGuard( GetLinguMutex() );
784 0 : return OUString( "Libhyphen Hyphenator" );
785 : }
786 :
787 0 : void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
788 : throw(Exception, RuntimeException, std::exception)
789 : {
790 0 : MutexGuard aGuard( GetLinguMutex() );
791 :
792 0 : if (!pPropHelper)
793 : {
794 0 : sal_Int32 nLen = rArguments.getLength();
795 0 : if (2 == nLen)
796 : {
797 0 : Reference< XLinguProperties > xPropSet;
798 0 : rArguments.getConstArray()[0] >>= xPropSet;
799 : // rArguments.getConstArray()[1] >>= xDicList;
800 :
801 : //! Pointer allows for access of the non-UNO functions.
802 : //! And the reference to the UNO-functions while increasing
803 : //! the ref-count and will implicitly free the memory
804 : //! when the object is not longer used.
805 0 : pPropHelper = new PropertyHelper_Hyphenation( (XHyphenator *) this, xPropSet );
806 0 : pPropHelper->AddAsPropListener(); //! after a reference is established
807 : }
808 : else {
809 : OSL_FAIL( "wrong number of arguments in sequence" );
810 : }
811 0 : }
812 0 : }
813 :
814 44 : void SAL_CALL Hyphenator::dispose()
815 : throw(RuntimeException, std::exception)
816 : {
817 44 : MutexGuard aGuard( GetLinguMutex() );
818 :
819 44 : if (!bDisposing)
820 : {
821 44 : bDisposing = true;
822 44 : EventObject aEvtObj( (XHyphenator *) this );
823 44 : aEvtListeners.disposeAndClear( aEvtObj );
824 44 : if (pPropHelper)
825 : {
826 10 : pPropHelper->RemoveAsPropListener();
827 10 : delete pPropHelper;
828 10 : pPropHelper = NULL;
829 44 : }
830 44 : }
831 44 : }
832 :
833 0 : void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
834 : throw(RuntimeException, std::exception)
835 : {
836 0 : MutexGuard aGuard( GetLinguMutex() );
837 :
838 0 : if (!bDisposing && rxListener.is())
839 0 : aEvtListeners.addInterface( rxListener );
840 0 : }
841 :
842 0 : void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
843 : throw(RuntimeException, std::exception)
844 : {
845 0 : MutexGuard aGuard( GetLinguMutex() );
846 :
847 0 : if (!bDisposing && rxListener.is())
848 0 : aEvtListeners.removeInterface( rxListener );
849 0 : }
850 :
851 : // Service specific part
852 44 : OUString SAL_CALL Hyphenator::getImplementationName()
853 : throw(RuntimeException, std::exception)
854 : {
855 44 : MutexGuard aGuard( GetLinguMutex() );
856 :
857 44 : return getImplementationName_Static();
858 : }
859 :
860 0 : sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
861 : throw(RuntimeException, std::exception)
862 : {
863 0 : return cppu::supportsService(this, ServiceName);
864 : }
865 :
866 0 : Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
867 : throw(RuntimeException, std::exception)
868 : {
869 0 : MutexGuard aGuard( GetLinguMutex() );
870 :
871 0 : return getSupportedServiceNames_Static();
872 : }
873 :
874 44 : Sequence< OUString > Hyphenator::getSupportedServiceNames_Static()
875 : throw()
876 : {
877 44 : MutexGuard aGuard( GetLinguMutex() );
878 :
879 44 : Sequence< OUString > aSNS( 1 ); // more than 1 service is possible, too
880 44 : aSNS.getArray()[0] = SN_HYPHENATOR;
881 44 : return aSNS;
882 : }
883 :
884 44 : void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName,
885 : XMultiServiceFactory * pServiceManager, void * )
886 : {
887 44 : void * pRet = 0;
888 44 : if ( Hyphenator::getImplementationName_Static().equalsAscii( pImplName ) )
889 : {
890 : Reference< XSingleServiceFactory > xFactory =
891 : cppu::createOneInstanceFactory(
892 : pServiceManager,
893 : Hyphenator::getImplementationName_Static(),
894 : Hyphenator_CreateInstance,
895 44 : Hyphenator::getSupportedServiceNames_Static());
896 : // acquire, because we return an interface pointer instead of a reference
897 44 : xFactory->acquire();
898 44 : pRet = xFactory.get();
899 : }
900 44 : return pRet;
901 : }
902 :
903 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|