Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <com/sun/star/uno/Reference.h>
21 : #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
22 :
23 : #include <cppuhelper/factory.hxx> // helper for factories
24 : #include <com/sun/star/registry/XRegistryKey.hpp>
25 : #include <i18nlangtag/languagetag.hxx>
26 : #include <tools/debug.hxx>
27 : #include <osl/mutex.hxx>
28 :
29 : #include <hyphen.h>
30 : #include <hyphenimp.hxx>
31 :
32 : #include <linguistic/hyphdta.hxx>
33 : #include <rtl/ustring.hxx>
34 : #include <rtl/ustrbuf.hxx>
35 : #include <rtl/textenc.h>
36 :
37 : #include <linguistic/lngprops.hxx>
38 : #include <linguistic/misc.hxx>
39 : #include <unotools/pathoptions.hxx>
40 : #include <unotools/useroptions.hxx>
41 : #include <unotools/lingucfg.hxx>
42 : #include <osl/file.hxx>
43 :
44 : #include <stdio.h>
45 : #include <string.h>
46 :
47 : #include <list>
48 : #include <set>
49 :
50 : using namespace utl;
51 : using namespace osl;
52 : using namespace com::sun::star;
53 : using namespace com::sun::star::beans;
54 : using namespace com::sun::star::lang;
55 : using namespace com::sun::star::uno;
56 : using namespace com::sun::star::linguistic2;
57 : using namespace linguistic;
58 :
59 :
60 : // min, max
61 : #define Max(a,b) (a > b ? a : b)
62 :
63 : ///////////////////////////////////////////////////////////////////////////
64 :
65 :
66 22 : Hyphenator::Hyphenator() :
67 22 : aEvtListeners ( GetLinguMutex() )
68 : {
69 22 : bDisposing = false;
70 22 : pPropHelper = NULL;
71 22 : aDicts = NULL;
72 22 : numdict = 0;
73 22 : }
74 :
75 66 : Hyphenator::~Hyphenator()
76 : {
77 22 : if (numdict && aDicts)
78 : {
79 528 : for (int i=0; i < numdict; ++i)
80 : {
81 506 : delete aDicts[i].apCC;
82 506 : if (aDicts[i].aPtr)
83 7 : hnj_hyphen_free(aDicts[i].aPtr);
84 : }
85 : }
86 22 : delete[] aDicts;
87 :
88 22 : if (pPropHelper)
89 : {
90 0 : pPropHelper->RemoveAsPropListener();
91 0 : delete pPropHelper;
92 : }
93 44 : }
94 :
95 7 : PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl()
96 : {
97 7 : if (!pPropHelper)
98 : {
99 7 : Reference< XLinguProperties > xPropSet( GetLinguProperties(), UNO_QUERY );
100 :
101 7 : pPropHelper = new PropertyHelper_Hyphenation ((XHyphenator *) this, xPropSet );
102 7 : pPropHelper->AddAsPropListener(); //! after a reference is established
103 : }
104 7 : return *pPropHelper;
105 : }
106 :
107 :
108 46 : Sequence< Locale > SAL_CALL Hyphenator::getLocales()
109 : throw(RuntimeException)
110 : {
111 46 : MutexGuard aGuard( GetLinguMutex() );
112 :
113 : // this routine should return the locales supported by the installed
114 : // dictionaries.
115 :
116 46 : if (!numdict)
117 : {
118 22 : SvtLinguConfig aLinguCfg;
119 :
120 : // get list of dictionaries-to-use
121 : // (or better speaking: the list of dictionaries using the
122 : // new configuration entries).
123 44 : std::list< SvtLinguConfigDictionaryEntry > aDics;
124 44 : uno::Sequence< OUString > aFormatList;
125 : aLinguCfg.GetSupportedDictionaryFormatsFor( "Hyphenators",
126 22 : "org.openoffice.lingu.LibHnjHyphenator", aFormatList );
127 22 : sal_Int32 nLen = aFormatList.getLength();
128 44 : for (sal_Int32 i = 0; i < nLen; ++i)
129 : {
130 : std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
131 22 : aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
132 22 : aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
133 22 : }
134 :
135 : //!! for compatibility with old dictionaries (the ones not using extensions
136 : //!! or new configuration entries, but still using the dictionary.lst file)
137 : //!! Get the list of old style spell checking dictionaries to use...
138 : std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
139 44 : GetOldStyleDics( "HYPH" ) );
140 :
141 : // to prefer dictionaries with configuration entries we will only
142 : // use those old style dictionaries that add a language that
143 : // is not yet supported by the list od new style dictionaries
144 22 : MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
145 :
146 22 : numdict = aDics.size();
147 22 : if (numdict)
148 : {
149 : // get supported locales from the dictionaries-to-use...
150 22 : sal_Int32 k = 0;
151 22 : std::set< OUString, lt_rtl_OUString > aLocaleNamesSet;
152 22 : std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
153 528 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
154 : {
155 506 : uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
156 506 : sal_Int32 nLen2 = aLocaleNames.getLength();
157 1012 : for (k = 0; k < nLen2; ++k)
158 : {
159 506 : aLocaleNamesSet.insert( aLocaleNames[k] );
160 : }
161 506 : }
162 : // ... and add them to the resulting sequence
163 22 : aSuppLocales.realloc( aLocaleNamesSet.size() );
164 22 : std::set< OUString, lt_rtl_OUString >::const_iterator aItB;
165 22 : k = 0;
166 528 : for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB)
167 : {
168 506 : Locale aTmp( LanguageTag( *aItB ).getLocale());
169 506 : aSuppLocales[k++] = aTmp;
170 506 : }
171 :
172 : //! For each dictionary and each locale we need a separate entry.
173 : //! If this results in more than one dictionary per locale than (for now)
174 : //! it is undefined which dictionary gets used.
175 : //! In the future the implementation should support using several dictionaries
176 : //! for one locale.
177 22 : numdict = 0;
178 528 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
179 506 : numdict = numdict + aDictIt->aLocaleNames.getLength();
180 :
181 : // add dictionary information
182 22 : aDicts = new HDInfo[numdict];
183 :
184 22 : k = 0;
185 528 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
186 : {
187 1012 : if (aDictIt->aLocaleNames.getLength() > 0 &&
188 506 : aDictIt->aLocations.getLength() > 0)
189 : {
190 506 : uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
191 506 : sal_Int32 nLocales = aLocaleNames.getLength();
192 :
193 : // currently only one language per dictionary is supported in the actual implementation...
194 : // Thus here we work-around this by adding the same dictionary several times.
195 : // Once for each of it's supported locales.
196 1012 : for (sal_Int32 i = 0; i < nLocales; ++i)
197 : {
198 506 : LanguageTag aLanguageTag( aDictIt->aLocaleNames[i] );
199 506 : aDicts[k].aPtr = NULL;
200 506 : aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
201 506 : aDicts[k].aLoc = aLanguageTag.getLocale();
202 506 : aDicts[k].apCC = new CharClass( aLanguageTag );
203 : // also both files have to be in the same directory and the
204 : // file names must only differ in the extension (.aff/.dic).
205 : // Thus we use the first location only and strip the extension part.
206 1012 : OUString aLocation = aDictIt->aLocations[0];
207 506 : sal_Int32 nPos = aLocation.lastIndexOf( '.' );
208 506 : aLocation = aLocation.copy( 0, nPos );
209 506 : aDicts[k].aName = aLocation;
210 :
211 506 : ++k;
212 1012 : }
213 : }
214 : }
215 22 : DBG_ASSERT( k == numdict, "index mismatch?" );
216 : }
217 : else
218 : {
219 : /* no dictionary found so register no dictionaries */
220 0 : numdict = 0;
221 0 : aDicts = NULL;
222 0 : aSuppLocales.realloc(0);
223 22 : }
224 : }
225 :
226 46 : return aSuppLocales;
227 : }
228 :
229 :
230 :
231 21899 : sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
232 : throw(RuntimeException)
233 : {
234 21899 : MutexGuard aGuard( GetLinguMutex() );
235 :
236 21899 : sal_Bool bRes = sal_False;
237 21899 : if (!aSuppLocales.getLength())
238 0 : getLocales();
239 :
240 21899 : const Locale *pLocale = aSuppLocales.getConstArray();
241 21899 : sal_Int32 nLen = aSuppLocales.getLength();
242 437980 : for (sal_Int32 i = 0; i < nLen; ++i)
243 : {
244 437980 : if (rLocale == pLocale[i])
245 : {
246 21899 : bRes = sal_True;
247 21899 : break;
248 : }
249 : }
250 21899 : return bRes;
251 : }
252 :
253 :
254 21892 : Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord,
255 : const ::com::sun::star::lang::Locale& aLocale,
256 : sal_Int16 nMaxLeading,
257 : const ::com::sun::star::beans::PropertyValues& aProperties )
258 : throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException)
259 : {
260 21892 : int nHyphenationPos = -1;
261 21892 : int nHyphenationPosAlt = -1;
262 21892 : int nHyphenationPosAltHyph = -1;
263 : int wordlen;
264 : char *hyphens;
265 : char *lcword;
266 21892 : int k = 0;
267 :
268 21892 : PropertyHelper_Hyphenation& rHelper = GetPropHelper();
269 21892 : rHelper.SetTmpPropVals(aProperties);
270 21892 : sal_Int16 minTrail = rHelper.GetMinTrailing();
271 21892 : sal_Int16 minLead = rHelper.GetMinLeading();
272 21892 : sal_Int16 minLen = rHelper.GetMinWordLength();
273 :
274 21892 : HyphenDict *dict = NULL;
275 21892 : rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
276 21892 : CharClass * pCC = NULL;
277 :
278 21892 : Reference< XHyphenatedWord > xRes;
279 :
280 21892 : k = -1;
281 525408 : for (int j = 0; j < numdict; j++)
282 : {
283 503516 : if (aLocale == aDicts[j].aLoc)
284 21892 : k = j;
285 : }
286 :
287 : // if we have a hyphenation dictionary matching this locale
288 21892 : if (k != -1)
289 : {
290 : // if this dictinary has not been loaded yet do that
291 21892 : if (!aDicts[k].aPtr)
292 : {
293 7 : OUString DictFN = aDicts[k].aName + ".dic";
294 14 : OUString dictpath;
295 :
296 7 : osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
297 14 : OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
298 :
299 : #if defined(WNT)
300 : // workaround for Windows specifc problem that the
301 : // path length in calls to 'fopen' is limted to somewhat
302 : // about 120+ characters which will usually be exceed when
303 : // using dictionaries as extensions.
304 : sTmp = Win_GetShortPathName( dictpath );
305 : #endif
306 :
307 7 : if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
308 : {
309 0 : fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
310 0 : return NULL;
311 : }
312 7 : aDicts[k].aPtr = dict;
313 14 : aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
314 : }
315 :
316 : // other wise hyphenate the word with that dictionary
317 21892 : dict = aDicts[k].aPtr;
318 21892 : eEnc = aDicts[k].eEnc;
319 21892 : pCC = aDicts[k].apCC;
320 :
321 : // we don't want to work with a default text encoding since following incorrect
322 : // results may occur only for specific text and thus may be hard to notice.
323 : // Thus better always make a clean exit here if the text encoding is in question.
324 : // Hopefully something not working at all will raise proper attention quickly. ;-)
325 : DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
326 21892 : if (eEnc == RTL_TEXTENCODING_DONTKNOW)
327 0 : return NULL;
328 :
329 21892 : sal_uInt16 ct = capitalType(aWord, pCC);
330 :
331 : // first convert any smart quotes or apostrophes to normal ones
332 21892 : OUStringBuffer rBuf(aWord);
333 21892 : sal_Int32 nc = rBuf.getLength();
334 : sal_Unicode ch;
335 119120 : for (sal_Int32 ix=0; ix < nc; ix++)
336 : {
337 97228 : ch = rBuf[ix];
338 97228 : if ((ch == 0x201C) || (ch == 0x201D))
339 0 : rBuf[ix] = (sal_Unicode)0x0022;
340 97228 : if ((ch == 0x2018) || (ch == 0x2019))
341 0 : rBuf[ix] = (sal_Unicode)0x0027;
342 : }
343 43784 : OUString nWord(rBuf.makeStringAndClear());
344 :
345 : // now convert word to all lowercase for pattern recognition
346 43784 : OUString nTerm(makeLowerCase(nWord, pCC));
347 :
348 : // now convert word to needed encoding
349 43784 : OString encWord(OU2ENC(nTerm,eEnc));
350 :
351 21892 : wordlen = encWord.getLength();
352 21892 : lcword = new char[wordlen + 1];
353 21892 : hyphens = new char[wordlen + 5];
354 :
355 21892 : char ** rep = NULL; // replacements of discretionary hyphenation
356 21892 : int * pos = NULL; // array of [hyphenation point] minus [deletion position]
357 21892 : int * cut = NULL; // length of deletions in original word
358 :
359 : // copy converted word into simple char buffer
360 21892 : strcpy(lcword,encWord.getStr());
361 :
362 : // now strip off any ending periods
363 21892 : int n = wordlen-1;
364 43784 : while((n >=0) && (lcword[n] == '.'))
365 0 : n--;
366 21892 : n++;
367 21892 : if (n > 0)
368 : {
369 : const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL,
370 : &rep, &pos, &cut, minLead, minTrail,
371 21892 : Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
372 43784 : Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
373 21892 : if (bFailed)
374 : {
375 : //whoops something did not work
376 0 : delete[] hyphens;
377 0 : delete[] lcword;
378 0 : if (rep)
379 : {
380 0 : for(int j = 0; j < n; j++)
381 : {
382 0 : if (rep[j]) free(rep[j]);
383 : }
384 0 : free(rep);
385 : }
386 0 : if (pos) free(pos);
387 0 : if (cut) free(cut);
388 0 : return NULL;
389 : }
390 : }
391 :
392 : // now backfill hyphens[] for any removed trailing periods
393 21892 : for (int c = n; c < wordlen; c++) hyphens[c] = '0';
394 21892 : hyphens[wordlen] = '\0';
395 :
396 21892 : sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading );
397 :
398 119120 : for (sal_Int32 i = 0; i < n; i++)
399 : {
400 97228 : int leftrep = 0;
401 97228 : sal_Bool hit = (n >= minLen);
402 97228 : if (!rep || !rep[i] || (i >= n))
403 : {
404 97228 : hit = hit && (hyphens[i]&1) && (i < Leading);
405 97228 : hit = hit && (i >= (minLead-1) );
406 97228 : hit = hit && ((n - i - 1) >= minTrail);
407 : }
408 : else
409 : {
410 : // calculate change character length before hyphenation point signed with '='
411 0 : for (char * c = rep[i]; *c && (*c != '='); c++)
412 : {
413 0 : if (eEnc == RTL_TEXTENCODING_UTF8)
414 : {
415 0 : if (((unsigned char) *c) >> 6 != 2)
416 0 : leftrep++;
417 : }
418 : else
419 0 : leftrep++;
420 : }
421 0 : hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
422 0 : hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
423 0 : hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
424 : }
425 97228 : if (hit)
426 : {
427 8 : nHyphenationPos = i;
428 8 : if (rep && (i < n) && rep[i])
429 : {
430 0 : nHyphenationPosAlt = i - pos[i];
431 0 : nHyphenationPosAltHyph = i + leftrep - pos[i];
432 : }
433 : }
434 : }
435 :
436 21892 : if (nHyphenationPos == -1)
437 : {
438 21884 : xRes = NULL;
439 : }
440 : else
441 : {
442 8 : if (rep && rep[nHyphenationPos])
443 : {
444 : // remove equal sign
445 0 : char * s = rep[nHyphenationPos];
446 0 : int eq = 0;
447 0 : for (; *s; s++)
448 : {
449 0 : if (*s == '=') eq = 1;
450 0 : if (eq) *s = *(s + 1);
451 : }
452 0 : OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
453 0 : OUString repHyph;
454 0 : switch (ct)
455 : {
456 : case CAPTYPE_ALLCAP:
457 : {
458 0 : repHyph = makeUpperCase(repHyphlow, pCC);
459 0 : break;
460 : }
461 : case CAPTYPE_INITCAP:
462 : {
463 0 : if (nHyphenationPosAlt == -1)
464 0 : repHyph = makeInitCap(repHyphlow, pCC);
465 : else
466 0 : repHyph = repHyphlow;
467 0 : break;
468 : }
469 : default:
470 : {
471 0 : repHyph = repHyphlow;
472 0 : break;
473 : }
474 : }
475 :
476 : // handle shortening
477 : sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
478 0 : nHyphenationPosAltHyph : nHyphenationPos);
479 : // dicretionary hyphenation
480 0 : xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos,
481 0 : aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
482 0 : (sal_Int16) nHyphenationPosAltHyph);
483 : }
484 : else
485 : {
486 16 : xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ),
487 8 : (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos);
488 : }
489 : }
490 :
491 21892 : delete[] lcword;
492 21892 : delete[] hyphens;
493 21892 : if (rep)
494 : {
495 0 : for(int j = 0; j < n; j++)
496 : {
497 0 : if (rep[j]) free(rep[j]);
498 : }
499 0 : free(rep);
500 : }
501 21892 : if (pos) free(pos);
502 21892 : if (cut) free(cut);
503 43784 : return xRes;
504 : }
505 0 : return NULL;
506 : }
507 :
508 :
509 0 : Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
510 : const OUString& /*aWord*/,
511 : const ::com::sun::star::lang::Locale& /*aLocale*/,
512 : sal_Int16 /*nIndex*/,
513 : const ::com::sun::star::beans::PropertyValues& /*aProperties*/ )
514 : throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
515 : {
516 : /* alternative spelling isn't supported by tex dictionaries */
517 : /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */
518 : /* TASK: implement queryAlternativeSpelling() */
519 0 : return NULL;
520 : }
521 :
522 0 : Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord,
523 : const ::com::sun::star::lang::Locale& aLocale,
524 : const ::com::sun::star::beans::PropertyValues& aProperties )
525 : throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
526 : {
527 0 : PropertyHelper_Hyphenation& rHelper = GetPropHelper();
528 0 : rHelper.SetTmpPropVals(aProperties);
529 0 : sal_Int16 minTrail = rHelper.GetMinTrailing();
530 0 : sal_Int16 minLead = rHelper.GetMinLeading();
531 0 : sal_Int16 minLen = rHelper.GetMinWordLength();
532 :
533 : //Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as
534 : //well as "hyphenate"
535 0 : if (aWord.getLength() < minLen)
536 : {
537 0 : return PossibleHyphens::CreatePossibleHyphens( aWord, LinguLocaleToLanguage( aLocale ),
538 0 : aWord, Sequence< sal_Int16 >() );
539 : }
540 :
541 0 : int k = -1;
542 0 : for (int j = 0; j < numdict; j++)
543 : {
544 0 : if (aLocale == aDicts[j].aLoc) k = j;
545 : }
546 :
547 : // if we have a hyphenation dictionary matching this locale
548 0 : if (k != -1)
549 : {
550 0 : HyphenDict *dict = NULL;
551 : // if this dictioanry has not been loaded yet do that
552 0 : if (!aDicts[k].aPtr)
553 : {
554 0 : OUString DictFN = aDicts[k].aName + ".dic";
555 0 : OUString dictpath;
556 :
557 0 : osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
558 0 : OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
559 :
560 : #if defined(WNT)
561 : // workaround for Windows specifc problem that the
562 : // path length in calls to 'fopen' is limted to somewhat
563 : // about 120+ characters which will usually be exceed when
564 : // using dictionaries as extensions.
565 : sTmp = Win_GetShortPathName( dictpath );
566 : #endif
567 :
568 0 : if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
569 : {
570 0 : fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
571 0 : return NULL;
572 : }
573 0 : aDicts[k].aPtr = dict;
574 0 : aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
575 : }
576 :
577 : // other wise hyphenate the word with that dictionary
578 0 : dict = aDicts[k].aPtr;
579 0 : rtl_TextEncoding eEnc = aDicts[k].eEnc;
580 0 : CharClass* pCC = aDicts[k].apCC;
581 :
582 : // we don't want to work with a default text encoding since following incorrect
583 : // results may occur only for specific text and thus may be hard to notice.
584 : // Thus better always make a clean exit here if the text encoding is in question.
585 : // Hopefully something not working at all will raise proper attention quickly. ;-)
586 : DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
587 0 : if (eEnc == RTL_TEXTENCODING_DONTKNOW)
588 0 : return NULL;
589 :
590 : // first handle smart quotes both single and double
591 0 : OUStringBuffer rBuf(aWord);
592 0 : sal_Int32 nc = rBuf.getLength();
593 : sal_Unicode ch;
594 0 : for (sal_Int32 ix=0; ix < nc; ix++)
595 : {
596 0 : ch = rBuf[ix];
597 0 : if ((ch == 0x201C) || (ch == 0x201D))
598 0 : rBuf[ix] = (sal_Unicode)0x0022;
599 0 : if ((ch == 0x2018) || (ch == 0x2019))
600 0 : rBuf[ix] = (sal_Unicode)0x0027;
601 : }
602 0 : OUString nWord(rBuf.makeStringAndClear());
603 :
604 : // now convert word to all lowercase for pattern recognition
605 0 : OUString nTerm(makeLowerCase(nWord, pCC));
606 :
607 : // now convert word to needed encoding
608 0 : OString encWord(OU2ENC(nTerm,eEnc));
609 :
610 0 : int wordlen = encWord.getLength();
611 0 : char *lcword = new char[wordlen+1];
612 0 : char *hyphens = new char[wordlen+5];
613 0 : char ** rep = NULL; // replacements of discretionary hyphenation
614 0 : int * pos = NULL; // array of [hyphenation point] minus [deletion position]
615 0 : int * cut = NULL; // length of deletions in original word
616 :
617 : // copy converted word into simple char buffer
618 0 : strcpy(lcword,encWord.getStr());
619 :
620 : // first remove any trailing periods
621 0 : int n = wordlen-1;
622 0 : while((n >=0) && (lcword[n] == '.'))
623 0 : n--;
624 0 : n++;
625 0 : if (n > 0)
626 : {
627 : const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL,
628 : &rep, &pos, &cut, minLead, minTrail,
629 0 : Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
630 0 : Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
631 0 : if (bFailed)
632 : {
633 0 : delete[] hyphens;
634 0 : delete[] lcword;
635 :
636 0 : if (rep)
637 : {
638 0 : for(int j = 0; j < n; j++)
639 : {
640 0 : if (rep[j]) free(rep[j]);
641 : }
642 0 : free(rep);
643 : }
644 0 : if (pos) free(pos);
645 0 : if (cut) free(cut);
646 :
647 0 : return NULL;
648 : }
649 : }
650 : // now backfill hyphens[] for any removed periods
651 0 : for (int c = n; c < wordlen; c++)
652 0 : hyphens[c] = '0';
653 0 : hyphens[wordlen] = '\0';
654 :
655 0 : sal_Int16 nHyphCount = 0;
656 : sal_Int16 i;
657 :
658 0 : for ( i = 0; i < encWord.getLength(); i++)
659 : {
660 0 : if (hyphens[i]&1 && (!rep || !rep[i]))
661 0 : nHyphCount++;
662 : }
663 :
664 0 : Sequence< sal_Int16 > aHyphPos(nHyphCount);
665 0 : sal_Int16 *pPos = aHyphPos.getArray();
666 0 : OUStringBuffer hyphenatedWordBuffer;
667 0 : nHyphCount = 0;
668 :
669 0 : for (i = 0; i < nWord.getLength(); i++)
670 : {
671 0 : hyphenatedWordBuffer.append(aWord[i]);
672 : // hyphenation position (not alternative)
673 0 : if (hyphens[i]&1 && (!rep || !rep[i]))
674 : {
675 0 : pPos[nHyphCount] = i;
676 0 : hyphenatedWordBuffer.append(sal_Unicode('='));
677 0 : nHyphCount++;
678 : }
679 : }
680 :
681 0 : OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
682 :
683 : Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens(
684 0 : aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos);
685 :
686 0 : delete[] hyphens;
687 0 : delete[] lcword;
688 :
689 0 : if (rep)
690 : {
691 0 : for(int j = 0; j < n; j++)
692 : {
693 0 : if (rep[j]) free(rep[j]);
694 : }
695 0 : free(rep);
696 : }
697 0 : if (pos) free(pos);
698 0 : if (cut) free(cut);
699 :
700 0 : return xRes;
701 : }
702 :
703 0 : return NULL;
704 : }
705 :
706 21892 : OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC)
707 : {
708 21892 : if (pCC)
709 21892 : return pCC->lowercase(aTerm);
710 0 : return aTerm;
711 : }
712 :
713 0 : OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC)
714 : {
715 0 : if (pCC)
716 0 : return pCC->uppercase(aTerm);
717 0 : return aTerm;
718 : }
719 :
720 :
721 0 : OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC)
722 : {
723 0 : sal_Int32 tlen = aTerm.getLength();
724 0 : if ((pCC) && (tlen))
725 : {
726 0 : OUString bTemp = aTerm.copy(0,1);
727 0 : if (tlen > 1)
728 0 : return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) );
729 :
730 0 : return pCC->uppercase(bTemp, 0, 1);
731 : }
732 0 : return aTerm;
733 : }
734 :
735 :
736 22 : Reference< XInterface > SAL_CALL Hyphenator_CreateInstance(
737 : const Reference< XMultiServiceFactory > & /*rSMgr*/ )
738 : throw(Exception)
739 : {
740 22 : Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator;
741 22 : return xService;
742 : }
743 :
744 :
745 7 : sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
746 : const Reference< XLinguServiceEventListener >& rxLstnr )
747 : throw(RuntimeException)
748 : {
749 7 : MutexGuard aGuard( GetLinguMutex() );
750 :
751 7 : sal_Bool bRes = sal_False;
752 7 : if (!bDisposing && rxLstnr.is())
753 : {
754 7 : bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
755 : }
756 7 : return bRes;
757 : }
758 :
759 :
760 7 : sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
761 : const Reference< XLinguServiceEventListener >& rxLstnr )
762 : throw(RuntimeException)
763 : {
764 7 : MutexGuard aGuard( GetLinguMutex() );
765 :
766 7 : sal_Bool bRes = sal_False;
767 7 : if (!bDisposing && rxLstnr.is())
768 : {
769 0 : bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
770 : }
771 7 : return bRes;
772 : }
773 :
774 :
775 0 : OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
776 : throw(RuntimeException)
777 : {
778 0 : MutexGuard aGuard( GetLinguMutex() );
779 0 : return OUString( "Libhyphen Hyphenator" );
780 : }
781 :
782 :
783 0 : void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
784 : throw(Exception, RuntimeException)
785 : {
786 0 : MutexGuard aGuard( GetLinguMutex() );
787 :
788 0 : if (!pPropHelper)
789 : {
790 0 : sal_Int32 nLen = rArguments.getLength();
791 0 : if (2 == nLen)
792 : {
793 0 : Reference< XLinguProperties > xPropSet;
794 0 : rArguments.getConstArray()[0] >>= xPropSet;
795 : //rArguments.getConstArray()[1] >>= xDicList;
796 :
797 : //! Pointer allows for access of the non-UNO functions.
798 : //! And the reference to the UNO-functions while increasing
799 : //! the ref-count and will implicitly free the memory
800 : //! when the object is not longer used.
801 0 : pPropHelper = new PropertyHelper_Hyphenation( (XHyphenator *) this, xPropSet );
802 0 : pPropHelper->AddAsPropListener(); //! after a reference is established
803 : }
804 : else {
805 : OSL_FAIL( "wrong number of arguments in sequence" );
806 : }
807 0 : }
808 0 : }
809 :
810 :
811 22 : void SAL_CALL Hyphenator::dispose()
812 : throw(RuntimeException)
813 : {
814 22 : MutexGuard aGuard( GetLinguMutex() );
815 :
816 22 : if (!bDisposing)
817 : {
818 22 : bDisposing = true;
819 22 : EventObject aEvtObj( (XHyphenator *) this );
820 22 : aEvtListeners.disposeAndClear( aEvtObj );
821 22 : if (pPropHelper)
822 : {
823 7 : pPropHelper->RemoveAsPropListener();
824 7 : delete pPropHelper;
825 7 : pPropHelper = NULL;
826 22 : }
827 22 : }
828 22 : }
829 :
830 :
831 0 : void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
832 : throw(RuntimeException)
833 : {
834 0 : MutexGuard aGuard( GetLinguMutex() );
835 :
836 0 : if (!bDisposing && rxListener.is())
837 0 : aEvtListeners.addInterface( rxListener );
838 0 : }
839 :
840 :
841 0 : void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
842 : throw(RuntimeException)
843 : {
844 0 : MutexGuard aGuard( GetLinguMutex() );
845 :
846 0 : if (!bDisposing && rxListener.is())
847 0 : aEvtListeners.removeInterface( rxListener );
848 0 : }
849 :
850 :
851 : ///////////////////////////////////////////////////////////////////////////
852 : // Service specific part
853 : //
854 :
855 23 : OUString SAL_CALL Hyphenator::getImplementationName()
856 : throw(RuntimeException)
857 : {
858 23 : MutexGuard aGuard( GetLinguMutex() );
859 :
860 23 : return getImplementationName_Static();
861 : }
862 :
863 :
864 0 : sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
865 : throw(RuntimeException)
866 : {
867 0 : MutexGuard aGuard( GetLinguMutex() );
868 :
869 0 : Sequence< OUString > aSNL = getSupportedServiceNames();
870 0 : const OUString * pArray = aSNL.getConstArray();
871 0 : for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
872 0 : if( pArray[i] == ServiceName )
873 0 : return sal_True;
874 0 : return sal_False;
875 : }
876 :
877 :
878 0 : Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
879 : throw(RuntimeException)
880 : {
881 0 : MutexGuard aGuard( GetLinguMutex() );
882 :
883 0 : return getSupportedServiceNames_Static();
884 : }
885 :
886 :
887 22 : Sequence< OUString > Hyphenator::getSupportedServiceNames_Static()
888 : throw()
889 : {
890 22 : MutexGuard aGuard( GetLinguMutex() );
891 :
892 22 : Sequence< OUString > aSNS( 1 ); // auch mehr als 1 Service moeglich
893 22 : aSNS.getArray()[0] = SN_HYPHENATOR;
894 22 : return aSNS;
895 : }
896 :
897 22 : void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName,
898 : XMultiServiceFactory * pServiceManager, void * )
899 : {
900 22 : void * pRet = 0;
901 22 : if ( !Hyphenator::getImplementationName_Static().compareToAscii( pImplName ) )
902 : {
903 : Reference< XSingleServiceFactory > xFactory =
904 : cppu::createOneInstanceFactory(
905 : pServiceManager,
906 : Hyphenator::getImplementationName_Static(),
907 : Hyphenator_CreateInstance,
908 22 : Hyphenator::getSupportedServiceNames_Static());
909 : // acquire, because we return an interface pointer instead of a reference
910 22 : xFactory->acquire();
911 22 : pRet = xFactory.get();
912 : }
913 22 : return pRet;
914 : }
915 :
916 :
917 : ///////////////////////////////////////////////////////////////////////////
918 :
919 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|