Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <com/sun/star/uno/Reference.h>
21 : #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
22 :
23 : #include <cppuhelper/factory.hxx>
24 : #include <cppuhelper/supportsservice.hxx>
25 : #include <com/sun/star/registry/XRegistryKey.hpp>
26 : #include <i18nlangtag/languagetag.hxx>
27 : #include <tools/debug.hxx>
28 : #include <osl/mutex.hxx>
29 :
30 : #include <hyphen.h>
31 : #include <hyphenimp.hxx>
32 :
33 : #include <linguistic/hyphdta.hxx>
34 : #include <rtl/ustring.hxx>
35 : #include <rtl/ustrbuf.hxx>
36 : #include <rtl/textenc.h>
37 :
38 : #include <linguistic/lngprops.hxx>
39 : #include <linguistic/misc.hxx>
40 : #include <unotools/pathoptions.hxx>
41 : #include <unotools/useroptions.hxx>
42 : #include <unotools/lingucfg.hxx>
43 : #include <osl/file.hxx>
44 :
45 : #include <stdio.h>
46 : #include <string.h>
47 :
48 : #include <list>
49 : #include <set>
50 :
51 : using namespace utl;
52 : using namespace osl;
53 : using namespace com::sun::star;
54 : using namespace com::sun::star::beans;
55 : using namespace com::sun::star::lang;
56 : using namespace com::sun::star::uno;
57 : using namespace com::sun::star::linguistic2;
58 : using namespace linguistic;
59 :
60 : // min, max
61 : #define Max(a,b) (a > b ? a : b)
62 :
63 0 : Hyphenator::Hyphenator() :
64 0 : aEvtListeners ( GetLinguMutex() )
65 : {
66 0 : bDisposing = false;
67 0 : pPropHelper = NULL;
68 0 : aDicts = NULL;
69 0 : numdict = 0;
70 0 : }
71 :
72 0 : Hyphenator::~Hyphenator()
73 : {
74 0 : if (numdict && aDicts)
75 : {
76 0 : for (int i=0; i < numdict; ++i)
77 : {
78 0 : delete aDicts[i].apCC;
79 0 : if (aDicts[i].aPtr)
80 0 : hnj_hyphen_free(aDicts[i].aPtr);
81 : }
82 : }
83 0 : delete[] aDicts;
84 :
85 0 : if (pPropHelper)
86 : {
87 0 : pPropHelper->RemoveAsPropListener();
88 0 : delete pPropHelper;
89 : }
90 0 : }
91 :
92 0 : PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl()
93 : {
94 0 : if (!pPropHelper)
95 : {
96 0 : Reference< XLinguProperties > xPropSet( GetLinguProperties(), UNO_QUERY );
97 :
98 0 : pPropHelper = new PropertyHelper_Hyphenation ((XHyphenator *) this, xPropSet );
99 0 : pPropHelper->AddAsPropListener(); //! after a reference is established
100 : }
101 0 : return *pPropHelper;
102 : }
103 :
104 0 : Sequence< Locale > SAL_CALL Hyphenator::getLocales()
105 : throw(RuntimeException, std::exception)
106 : {
107 0 : MutexGuard aGuard( GetLinguMutex() );
108 :
109 : // this routine should return the locales supported by the installed
110 : // dictionaries.
111 0 : if (!numdict)
112 : {
113 0 : SvtLinguConfig aLinguCfg;
114 :
115 : // get list of dictionaries-to-use
116 : // (or better speaking: the list of dictionaries using the
117 : // new configuration entries).
118 0 : std::list< SvtLinguConfigDictionaryEntry > aDics;
119 0 : uno::Sequence< OUString > aFormatList;
120 : aLinguCfg.GetSupportedDictionaryFormatsFor( "Hyphenators",
121 0 : "org.openoffice.lingu.LibHnjHyphenator", aFormatList );
122 0 : sal_Int32 nLen = aFormatList.getLength();
123 0 : for (sal_Int32 i = 0; i < nLen; ++i)
124 : {
125 : std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
126 0 : aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
127 0 : aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
128 0 : }
129 :
130 : //!! for compatibility with old dictionaries (the ones not using extensions
131 : //!! or new configuration entries, but still using the dictionary.lst file)
132 : //!! Get the list of old style spell checking dictionaries to use...
133 : std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
134 0 : GetOldStyleDics( "HYPH" ) );
135 :
136 : // to prefer dictionaries with configuration entries we will only
137 : // use those old style dictionaries that add a language that
138 : // is not yet supported by the list od new style dictionaries
139 0 : MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
140 :
141 0 : numdict = aDics.size();
142 0 : if (numdict)
143 : {
144 : // get supported locales from the dictionaries-to-use...
145 0 : sal_Int32 k = 0;
146 0 : std::set< OUString, lt_rtl_OUString > aLocaleNamesSet;
147 0 : std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
148 0 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
149 : {
150 0 : uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
151 0 : sal_Int32 nLen2 = aLocaleNames.getLength();
152 0 : for (k = 0; k < nLen2; ++k)
153 : {
154 0 : aLocaleNamesSet.insert( aLocaleNames[k] );
155 : }
156 0 : }
157 : // ... and add them to the resulting sequence
158 0 : aSuppLocales.realloc( aLocaleNamesSet.size() );
159 0 : std::set< OUString, lt_rtl_OUString >::const_iterator aItB;
160 0 : k = 0;
161 0 : for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB)
162 : {
163 0 : Locale aTmp( LanguageTag::convertToLocale( *aItB ));
164 0 : aSuppLocales[k++] = aTmp;
165 0 : }
166 :
167 : //! For each dictionary and each locale we need a separate entry.
168 : //! If this results in more than one dictionary per locale than (for now)
169 : //! it is undefined which dictionary gets used.
170 : //! In the future the implementation should support using several dictionaries
171 : //! for one locale.
172 0 : numdict = 0;
173 0 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
174 0 : numdict = numdict + aDictIt->aLocaleNames.getLength();
175 :
176 : // add dictionary information
177 0 : aDicts = new HDInfo[numdict];
178 :
179 0 : k = 0;
180 0 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
181 : {
182 0 : if (aDictIt->aLocaleNames.getLength() > 0 &&
183 0 : aDictIt->aLocations.getLength() > 0)
184 : {
185 0 : uno::Sequence< OUString > aLocaleNames( aDictIt->aLocaleNames );
186 0 : sal_Int32 nLocales = aLocaleNames.getLength();
187 :
188 : // currently only one language per dictionary is supported in the actual implementation...
189 : // Thus here we work-around this by adding the same dictionary several times.
190 : // Once for each of it's supported locales.
191 0 : for (sal_Int32 i = 0; i < nLocales; ++i)
192 : {
193 0 : LanguageTag aLanguageTag( aDictIt->aLocaleNames[i] );
194 0 : aDicts[k].aPtr = NULL;
195 0 : aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
196 0 : aDicts[k].aLoc = aLanguageTag.getLocale();
197 0 : aDicts[k].apCC = new CharClass( aLanguageTag );
198 : // also both files have to be in the same directory and the
199 : // file names must only differ in the extension (.aff/.dic).
200 : // Thus we use the first location only and strip the extension part.
201 0 : OUString aLocation = aDictIt->aLocations[0];
202 0 : sal_Int32 nPos = aLocation.lastIndexOf( '.' );
203 0 : aLocation = aLocation.copy( 0, nPos );
204 0 : aDicts[k].aName = aLocation;
205 :
206 0 : ++k;
207 0 : }
208 : }
209 : }
210 0 : DBG_ASSERT( k == numdict, "index mismatch?" );
211 : }
212 : else
213 : {
214 : // no dictionary found so register no dictionaries
215 0 : numdict = 0;
216 0 : aDicts = NULL;
217 0 : aSuppLocales.realloc(0);
218 0 : }
219 : }
220 :
221 0 : return aSuppLocales;
222 : }
223 :
224 0 : sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
225 : throw(RuntimeException, std::exception)
226 : {
227 0 : MutexGuard aGuard( GetLinguMutex() );
228 :
229 0 : sal_Bool bRes = sal_False;
230 0 : if (!aSuppLocales.getLength())
231 0 : getLocales();
232 :
233 0 : const Locale *pLocale = aSuppLocales.getConstArray();
234 0 : sal_Int32 nLen = aSuppLocales.getLength();
235 0 : for (sal_Int32 i = 0; i < nLen; ++i)
236 : {
237 0 : if (rLocale == pLocale[i])
238 : {
239 0 : bRes = sal_True;
240 0 : break;
241 : }
242 : }
243 0 : return bRes;
244 : }
245 :
246 0 : Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const OUString& aWord,
247 : const ::com::sun::star::lang::Locale& aLocale,
248 : sal_Int16 nMaxLeading,
249 : const ::com::sun::star::beans::PropertyValues& aProperties )
250 : throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException, std::exception)
251 : {
252 0 : int nHyphenationPos = -1;
253 0 : int nHyphenationPosAlt = -1;
254 0 : int nHyphenationPosAltHyph = -1;
255 : int wordlen;
256 : char *hyphens;
257 : char *lcword;
258 0 : int k = 0;
259 :
260 0 : PropertyHelper_Hyphenation& rHelper = GetPropHelper();
261 0 : rHelper.SetTmpPropVals(aProperties);
262 0 : sal_Int16 minTrail = rHelper.GetMinTrailing();
263 0 : sal_Int16 minLead = rHelper.GetMinLeading();
264 0 : sal_Int16 minLen = rHelper.GetMinWordLength();
265 :
266 0 : HyphenDict *dict = NULL;
267 0 : rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
268 0 : CharClass * pCC = NULL;
269 :
270 0 : Reference< XHyphenatedWord > xRes;
271 :
272 0 : k = -1;
273 0 : for (int j = 0; j < numdict; j++)
274 : {
275 0 : if (aLocale == aDicts[j].aLoc)
276 0 : k = j;
277 : }
278 :
279 : // if we have a hyphenation dictionary matching this locale
280 0 : if (k != -1)
281 : {
282 : // if this dictinary has not been loaded yet do that
283 0 : if (!aDicts[k].aPtr)
284 : {
285 0 : OUString DictFN = aDicts[k].aName + ".dic";
286 0 : OUString dictpath;
287 :
288 0 : osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
289 0 : OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
290 :
291 : #if defined(WNT)
292 : // workaround for Windows specific problem that the
293 : // path length in calls to 'fopen' is limted to somewhat
294 : // about 120+ characters which will usually be exceed when
295 : // using dictionaries as extensions.
296 : sTmp = Win_GetShortPathName( dictpath );
297 : #endif
298 :
299 0 : if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
300 : {
301 0 : fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
302 0 : return NULL;
303 : }
304 0 : aDicts[k].aPtr = dict;
305 0 : aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
306 : }
307 :
308 : // other wise hyphenate the word with that dictionary
309 0 : dict = aDicts[k].aPtr;
310 0 : eEnc = aDicts[k].eEnc;
311 0 : pCC = aDicts[k].apCC;
312 :
313 : // we don't want to work with a default text encoding since following incorrect
314 : // results may occur only for specific text and thus may be hard to notice.
315 : // Thus better always make a clean exit here if the text encoding is in question.
316 : // Hopefully something not working at all will raise proper attention quickly. ;-)
317 : DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
318 0 : if (eEnc == RTL_TEXTENCODING_DONTKNOW)
319 0 : return NULL;
320 :
321 0 : sal_uInt16 ct = capitalType(aWord, pCC);
322 :
323 : // first convert any smart quotes or apostrophes to normal ones
324 0 : OUStringBuffer rBuf(aWord);
325 0 : sal_Int32 nc = rBuf.getLength();
326 : sal_Unicode ch;
327 0 : for (sal_Int32 ix=0; ix < nc; ix++)
328 : {
329 0 : ch = rBuf[ix];
330 0 : if ((ch == 0x201C) || (ch == 0x201D))
331 0 : rBuf[ix] = (sal_Unicode)0x0022;
332 0 : if ((ch == 0x2018) || (ch == 0x2019))
333 0 : rBuf[ix] = (sal_Unicode)0x0027;
334 : }
335 0 : OUString nWord(rBuf.makeStringAndClear());
336 :
337 : // now convert word to all lowercase for pattern recognition
338 0 : OUString nTerm(makeLowerCase(nWord, pCC));
339 :
340 : // now convert word to needed encoding
341 0 : OString encWord(OU2ENC(nTerm,eEnc));
342 :
343 0 : wordlen = encWord.getLength();
344 0 : lcword = new char[wordlen + 1];
345 0 : hyphens = new char[wordlen + 5];
346 :
347 0 : char ** rep = NULL; // replacements of discretionary hyphenation
348 0 : int * pos = NULL; // array of [hyphenation point] minus [deletion position]
349 0 : int * cut = NULL; // length of deletions in original word
350 :
351 : // copy converted word into simple char buffer
352 0 : strcpy(lcword,encWord.getStr());
353 :
354 : // now strip off any ending periods
355 0 : int n = wordlen-1;
356 0 : while((n >=0) && (lcword[n] == '.'))
357 0 : n--;
358 0 : n++;
359 0 : if (n > 0)
360 : {
361 : const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL,
362 : &rep, &pos, &cut, minLead, minTrail,
363 0 : Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
364 0 : Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
365 0 : if (bFailed)
366 : {
367 : // whoops something did not work
368 0 : delete[] hyphens;
369 0 : delete[] lcword;
370 0 : if (rep)
371 : {
372 0 : for(int j = 0; j < n; j++)
373 : {
374 0 : if (rep[j]) free(rep[j]);
375 : }
376 0 : free(rep);
377 : }
378 0 : if (pos) free(pos);
379 0 : if (cut) free(cut);
380 0 : return NULL;
381 : }
382 : }
383 :
384 : // now backfill hyphens[] for any removed trailing periods
385 0 : for (int c = n; c < wordlen; c++) hyphens[c] = '0';
386 0 : hyphens[wordlen] = '\0';
387 :
388 0 : sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading );
389 :
390 0 : for (sal_Int32 i = 0; i < n; i++)
391 : {
392 0 : int leftrep = 0;
393 0 : sal_Bool hit = (n >= minLen);
394 0 : if (!rep || !rep[i] || (i >= n))
395 : {
396 0 : hit = hit && (hyphens[i]&1) && (i < Leading);
397 0 : hit = hit && (i >= (minLead-1) );
398 0 : hit = hit && ((n - i - 1) >= minTrail);
399 : }
400 : else
401 : {
402 : // calculate change character length before hyphenation point signed with '='
403 0 : for (char * c = rep[i]; *c && (*c != '='); c++)
404 : {
405 0 : if (eEnc == RTL_TEXTENCODING_UTF8)
406 : {
407 0 : if (((unsigned char) *c) >> 6 != 2)
408 0 : leftrep++;
409 : }
410 : else
411 0 : leftrep++;
412 : }
413 0 : hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
414 0 : hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
415 0 : hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
416 : }
417 0 : if (hit)
418 : {
419 0 : nHyphenationPos = i;
420 0 : if (rep && (i < n) && rep[i])
421 : {
422 0 : nHyphenationPosAlt = i - pos[i];
423 0 : nHyphenationPosAltHyph = i + leftrep - pos[i];
424 : }
425 : }
426 : }
427 :
428 0 : if (nHyphenationPos == -1)
429 : {
430 0 : xRes = NULL;
431 : }
432 : else
433 : {
434 0 : if (rep && rep[nHyphenationPos])
435 : {
436 : // remove equal sign
437 0 : char * s = rep[nHyphenationPos];
438 0 : int eq = 0;
439 0 : for (; *s; s++)
440 : {
441 0 : if (*s == '=') eq = 1;
442 0 : if (eq) *s = *(s + 1);
443 : }
444 0 : OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
445 0 : OUString repHyph;
446 0 : switch (ct)
447 : {
448 : case CAPTYPE_ALLCAP:
449 : {
450 0 : repHyph = makeUpperCase(repHyphlow, pCC);
451 0 : break;
452 : }
453 : case CAPTYPE_INITCAP:
454 : {
455 0 : if (nHyphenationPosAlt == -1)
456 0 : repHyph = makeInitCap(repHyphlow, pCC);
457 : else
458 0 : repHyph = repHyphlow;
459 0 : break;
460 : }
461 : default:
462 : {
463 0 : repHyph = repHyphlow;
464 0 : break;
465 : }
466 : }
467 :
468 : // handle shortening
469 : sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
470 0 : nHyphenationPosAltHyph : nHyphenationPos);
471 : // dicretionary hyphenation
472 0 : xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ), nPos,
473 0 : aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
474 0 : (sal_Int16) nHyphenationPosAltHyph);
475 : }
476 : else
477 : {
478 0 : xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LinguLocaleToLanguage( aLocale ),
479 0 : (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos);
480 : }
481 : }
482 :
483 0 : delete[] lcword;
484 0 : delete[] hyphens;
485 0 : if (rep)
486 : {
487 0 : for(int j = 0; j < n; j++)
488 : {
489 0 : if (rep[j]) free(rep[j]);
490 : }
491 0 : free(rep);
492 : }
493 0 : if (pos) free(pos);
494 0 : if (cut) free(cut);
495 0 : return xRes;
496 : }
497 0 : return NULL;
498 : }
499 :
500 0 : Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
501 : const OUString& aWord,
502 : const ::com::sun::star::lang::Locale& aLocale,
503 : sal_Int16 nIndex,
504 : const ::com::sun::star::beans::PropertyValues& aProperties )
505 : throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException, std::exception)
506 : {
507 : // Firstly we allow only one plus character before the hyphen to avoid to miss the right break point:
508 0 : for (int extrachar = 1; extrachar <= 2; extrachar++)
509 : {
510 0 : Reference< XHyphenatedWord > xRes = hyphenate(aWord, aLocale, nIndex + 1 + extrachar, aProperties);
511 0 : if (xRes.is() && xRes->isAlternativeSpelling() && xRes->getHyphenationPos() == nIndex)
512 0 : return xRes;
513 0 : }
514 0 : return NULL;
515 : }
516 :
517 0 : Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const OUString& aWord,
518 : const ::com::sun::star::lang::Locale& aLocale,
519 : const ::com::sun::star::beans::PropertyValues& aProperties )
520 : throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException, std::exception)
521 : {
522 0 : PropertyHelper_Hyphenation& rHelper = GetPropHelper();
523 0 : rHelper.SetTmpPropVals(aProperties);
524 0 : sal_Int16 minTrail = rHelper.GetMinTrailing();
525 0 : sal_Int16 minLead = rHelper.GetMinLeading();
526 0 : sal_Int16 minLen = rHelper.GetMinWordLength();
527 :
528 : // Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as
529 : // well as "hyphenate"
530 0 : if (aWord.getLength() < minLen)
531 : {
532 0 : return PossibleHyphens::CreatePossibleHyphens( aWord, LinguLocaleToLanguage( aLocale ),
533 0 : aWord, Sequence< sal_Int16 >() );
534 : }
535 :
536 0 : int k = -1;
537 0 : for (int j = 0; j < numdict; j++)
538 : {
539 0 : if (aLocale == aDicts[j].aLoc) k = j;
540 : }
541 :
542 : // if we have a hyphenation dictionary matching this locale
543 0 : if (k != -1)
544 : {
545 0 : HyphenDict *dict = NULL;
546 : // if this dictioanry has not been loaded yet do that
547 0 : if (!aDicts[k].aPtr)
548 : {
549 0 : OUString DictFN = aDicts[k].aName + ".dic";
550 0 : OUString dictpath;
551 :
552 0 : osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
553 0 : OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
554 :
555 : #if defined(WNT)
556 : // workaround for Windows specific problem that the
557 : // path length in calls to 'fopen' is limted to somewhat
558 : // about 120+ characters which will usually be exceed when
559 : // using dictionaries as extensions.
560 : sTmp = Win_GetShortPathName( dictpath );
561 : #endif
562 :
563 0 : if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
564 : {
565 0 : fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
566 0 : return NULL;
567 : }
568 0 : aDicts[k].aPtr = dict;
569 0 : aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
570 : }
571 :
572 : // other wise hyphenate the word with that dictionary
573 0 : dict = aDicts[k].aPtr;
574 0 : rtl_TextEncoding eEnc = aDicts[k].eEnc;
575 0 : CharClass* pCC = aDicts[k].apCC;
576 :
577 : // we don't want to work with a default text encoding since following incorrect
578 : // results may occur only for specific text and thus may be hard to notice.
579 : // Thus better always make a clean exit here if the text encoding is in question.
580 : // Hopefully something not working at all will raise proper attention quickly. ;-)
581 : DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
582 0 : if (eEnc == RTL_TEXTENCODING_DONTKNOW)
583 0 : return NULL;
584 :
585 : // first handle smart quotes both single and double
586 0 : OUStringBuffer rBuf(aWord);
587 0 : sal_Int32 nc = rBuf.getLength();
588 : sal_Unicode ch;
589 0 : for (sal_Int32 ix=0; ix < nc; ix++)
590 : {
591 0 : ch = rBuf[ix];
592 0 : if ((ch == 0x201C) || (ch == 0x201D))
593 0 : rBuf[ix] = (sal_Unicode)0x0022;
594 0 : if ((ch == 0x2018) || (ch == 0x2019))
595 0 : rBuf[ix] = (sal_Unicode)0x0027;
596 : }
597 0 : OUString nWord(rBuf.makeStringAndClear());
598 :
599 : // now convert word to all lowercase for pattern recognition
600 0 : OUString nTerm(makeLowerCase(nWord, pCC));
601 :
602 : // now convert word to needed encoding
603 0 : OString encWord(OU2ENC(nTerm,eEnc));
604 :
605 0 : int wordlen = encWord.getLength();
606 0 : char *lcword = new char[wordlen+1];
607 0 : char *hyphens = new char[wordlen+5];
608 0 : char ** rep = NULL; // replacements of discretionary hyphenation
609 0 : int * pos = NULL; // array of [hyphenation point] minus [deletion position]
610 0 : int * cut = NULL; // length of deletions in original word
611 :
612 : // copy converted word into simple char buffer
613 0 : strcpy(lcword,encWord.getStr());
614 :
615 : // first remove any trailing periods
616 0 : int n = wordlen-1;
617 0 : while((n >=0) && (lcword[n] == '.'))
618 0 : n--;
619 0 : n++;
620 0 : if (n > 0)
621 : {
622 : const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL,
623 : &rep, &pos, &cut, minLead, minTrail,
624 0 : Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
625 0 : Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
626 0 : if (bFailed)
627 : {
628 0 : delete[] hyphens;
629 0 : delete[] lcword;
630 :
631 0 : if (rep)
632 : {
633 0 : for(int j = 0; j < n; j++)
634 : {
635 0 : if (rep[j]) free(rep[j]);
636 : }
637 0 : free(rep);
638 : }
639 0 : if (pos) free(pos);
640 0 : if (cut) free(cut);
641 :
642 0 : return NULL;
643 : }
644 : }
645 : // now backfill hyphens[] for any removed periods
646 0 : for (int c = n; c < wordlen; c++)
647 0 : hyphens[c] = '0';
648 0 : hyphens[wordlen] = '\0';
649 :
650 0 : sal_Int16 nHyphCount = 0;
651 : sal_Int16 i;
652 :
653 0 : for ( i = 0; i < encWord.getLength(); i++)
654 : {
655 0 : if (hyphens[i]&1)
656 0 : nHyphCount++;
657 : }
658 :
659 0 : Sequence< sal_Int16 > aHyphPos(nHyphCount);
660 0 : sal_Int16 *pPos = aHyphPos.getArray();
661 0 : OUStringBuffer hyphenatedWordBuffer;
662 0 : nHyphCount = 0;
663 :
664 0 : for (i = 0; i < nWord.getLength(); i++)
665 : {
666 0 : hyphenatedWordBuffer.append(aWord[i]);
667 : // hyphenation position
668 0 : if (hyphens[i]&1)
669 : {
670 0 : pPos[nHyphCount] = i;
671 0 : hyphenatedWordBuffer.append('=');
672 0 : nHyphCount++;
673 : }
674 : }
675 :
676 0 : OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
677 :
678 : Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens(
679 0 : aWord, LinguLocaleToLanguage( aLocale ), hyphenatedWord, aHyphPos);
680 :
681 0 : delete[] hyphens;
682 0 : delete[] lcword;
683 :
684 0 : if (rep)
685 : {
686 0 : for(int j = 0; j < n; j++)
687 : {
688 0 : if (rep[j]) free(rep[j]);
689 : }
690 0 : free(rep);
691 : }
692 0 : if (pos) free(pos);
693 0 : if (cut) free(cut);
694 :
695 0 : return xRes;
696 : }
697 :
698 0 : return NULL;
699 : }
700 :
701 0 : OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC)
702 : {
703 0 : if (pCC)
704 0 : return pCC->lowercase(aTerm);
705 0 : return aTerm;
706 : }
707 :
708 0 : OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC)
709 : {
710 0 : if (pCC)
711 0 : return pCC->uppercase(aTerm);
712 0 : return aTerm;
713 : }
714 :
715 0 : OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC)
716 : {
717 0 : sal_Int32 tlen = aTerm.getLength();
718 0 : if ((pCC) && (tlen))
719 : {
720 0 : OUString bTemp = aTerm.copy(0,1);
721 0 : if (tlen > 1)
722 0 : return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) );
723 :
724 0 : return pCC->uppercase(bTemp, 0, 1);
725 : }
726 0 : return aTerm;
727 : }
728 :
729 0 : Reference< XInterface > SAL_CALL Hyphenator_CreateInstance(
730 : const Reference< XMultiServiceFactory > & /*rSMgr*/ )
731 : throw(Exception)
732 : {
733 0 : Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator;
734 0 : return xService;
735 : }
736 :
737 0 : sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
738 : const Reference< XLinguServiceEventListener >& rxLstnr )
739 : throw(RuntimeException, std::exception)
740 : {
741 0 : MutexGuard aGuard( GetLinguMutex() );
742 :
743 0 : sal_Bool bRes = sal_False;
744 0 : if (!bDisposing && rxLstnr.is())
745 : {
746 0 : bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
747 : }
748 0 : return bRes;
749 : }
750 :
751 0 : sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
752 : const Reference< XLinguServiceEventListener >& rxLstnr )
753 : throw(RuntimeException, std::exception)
754 : {
755 0 : MutexGuard aGuard( GetLinguMutex() );
756 :
757 0 : sal_Bool bRes = sal_False;
758 0 : if (!bDisposing && rxLstnr.is())
759 : {
760 0 : bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
761 : }
762 0 : return bRes;
763 : }
764 :
765 0 : OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
766 : throw(RuntimeException, std::exception)
767 : {
768 0 : MutexGuard aGuard( GetLinguMutex() );
769 0 : return OUString( "Libhyphen Hyphenator" );
770 : }
771 :
772 0 : void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
773 : throw(Exception, RuntimeException, std::exception)
774 : {
775 0 : MutexGuard aGuard( GetLinguMutex() );
776 :
777 0 : if (!pPropHelper)
778 : {
779 0 : sal_Int32 nLen = rArguments.getLength();
780 0 : if (2 == nLen)
781 : {
782 0 : Reference< XLinguProperties > xPropSet;
783 0 : rArguments.getConstArray()[0] >>= xPropSet;
784 : // rArguments.getConstArray()[1] >>= xDicList;
785 :
786 : //! Pointer allows for access of the non-UNO functions.
787 : //! And the reference to the UNO-functions while increasing
788 : //! the ref-count and will implicitly free the memory
789 : //! when the object is not longer used.
790 0 : pPropHelper = new PropertyHelper_Hyphenation( (XHyphenator *) this, xPropSet );
791 0 : pPropHelper->AddAsPropListener(); //! after a reference is established
792 : }
793 : else {
794 : OSL_FAIL( "wrong number of arguments in sequence" );
795 : }
796 0 : }
797 0 : }
798 :
799 0 : void SAL_CALL Hyphenator::dispose()
800 : throw(RuntimeException, std::exception)
801 : {
802 0 : MutexGuard aGuard( GetLinguMutex() );
803 :
804 0 : if (!bDisposing)
805 : {
806 0 : bDisposing = true;
807 0 : EventObject aEvtObj( (XHyphenator *) this );
808 0 : aEvtListeners.disposeAndClear( aEvtObj );
809 0 : if (pPropHelper)
810 : {
811 0 : pPropHelper->RemoveAsPropListener();
812 0 : delete pPropHelper;
813 0 : pPropHelper = NULL;
814 0 : }
815 0 : }
816 0 : }
817 :
818 0 : void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
819 : throw(RuntimeException, std::exception)
820 : {
821 0 : MutexGuard aGuard( GetLinguMutex() );
822 :
823 0 : if (!bDisposing && rxListener.is())
824 0 : aEvtListeners.addInterface( rxListener );
825 0 : }
826 :
827 0 : void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
828 : throw(RuntimeException, std::exception)
829 : {
830 0 : MutexGuard aGuard( GetLinguMutex() );
831 :
832 0 : if (!bDisposing && rxListener.is())
833 0 : aEvtListeners.removeInterface( rxListener );
834 0 : }
835 :
836 : // Service specific part
837 0 : OUString SAL_CALL Hyphenator::getImplementationName()
838 : throw(RuntimeException, std::exception)
839 : {
840 0 : MutexGuard aGuard( GetLinguMutex() );
841 :
842 0 : return getImplementationName_Static();
843 : }
844 :
845 0 : sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
846 : throw(RuntimeException, std::exception)
847 : {
848 0 : return cppu::supportsService(this, ServiceName);
849 : }
850 :
851 0 : Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
852 : throw(RuntimeException, std::exception)
853 : {
854 0 : MutexGuard aGuard( GetLinguMutex() );
855 :
856 0 : return getSupportedServiceNames_Static();
857 : }
858 :
859 0 : Sequence< OUString > Hyphenator::getSupportedServiceNames_Static()
860 : throw()
861 : {
862 0 : MutexGuard aGuard( GetLinguMutex() );
863 :
864 0 : Sequence< OUString > aSNS( 1 ); // more than 1 service is possible, too
865 0 : aSNS.getArray()[0] = SN_HYPHENATOR;
866 0 : return aSNS;
867 : }
868 :
869 0 : void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName,
870 : XMultiServiceFactory * pServiceManager, void * )
871 : {
872 0 : void * pRet = 0;
873 0 : if ( Hyphenator::getImplementationName_Static().equalsAscii( pImplName ) )
874 : {
875 : Reference< XSingleServiceFactory > xFactory =
876 : cppu::createOneInstanceFactory(
877 : pServiceManager,
878 : Hyphenator::getImplementationName_Static(),
879 : Hyphenator_CreateInstance,
880 0 : Hyphenator::getSupportedServiceNames_Static());
881 : // acquire, because we return an interface pointer instead of a reference
882 0 : xFactory->acquire();
883 0 : pRet = xFactory.get();
884 : }
885 0 : return pRet;
886 : }
887 :
888 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|