Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <com/sun/star/uno/Reference.h>
21 : #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
22 :
23 : #include <cppuhelper/factory.hxx> // helper for factories
24 : #include <com/sun/star/registry/XRegistryKey.hpp>
25 : #include <i18npool/languagetag.hxx>
26 : #include <tools/debug.hxx>
27 : #include <osl/mutex.hxx>
28 :
29 : #include <hyphen.h>
30 : #include <hyphenimp.hxx>
31 :
32 : #include <linguistic/hyphdta.hxx>
33 : #include <rtl/ustring.hxx>
34 : #include <rtl/ustrbuf.hxx>
35 : #include <rtl/textenc.h>
36 :
37 : #include <linguistic/lngprops.hxx>
38 : #include <linguistic/misc.hxx>
39 : #include <unotools/pathoptions.hxx>
40 : #include <unotools/useroptions.hxx>
41 : #include <unotools/lingucfg.hxx>
42 : #include <osl/file.hxx>
43 :
44 : #include <stdio.h>
45 : #include <string.h>
46 :
47 : #include <list>
48 : #include <set>
49 :
50 : using namespace utl;
51 : using namespace osl;
52 : using namespace com::sun::star;
53 : using namespace com::sun::star::beans;
54 : using namespace com::sun::star::lang;
55 : using namespace com::sun::star::uno;
56 : using namespace com::sun::star::linguistic2;
57 : using namespace linguistic;
58 :
59 : using ::rtl::OUString;
60 :
61 : // min, max
62 : #define Max(a,b) (a > b ? a : b)
63 :
64 : ///////////////////////////////////////////////////////////////////////////
65 :
66 :
67 0 : Hyphenator::Hyphenator() :
68 0 : aEvtListeners ( GetLinguMutex() )
69 : {
70 0 : bDisposing = false;
71 0 : pPropHelper = NULL;
72 0 : aDicts = NULL;
73 0 : numdict = 0;
74 0 : }
75 :
76 0 : Hyphenator::~Hyphenator()
77 : {
78 0 : if (numdict && aDicts)
79 : {
80 0 : for (int i=0; i < numdict; ++i)
81 : {
82 0 : delete aDicts[i].apCC;
83 0 : if (aDicts[i].aPtr)
84 0 : hnj_hyphen_free(aDicts[i].aPtr);
85 : }
86 : }
87 0 : delete[] aDicts;
88 :
89 0 : if (pPropHelper)
90 : {
91 0 : pPropHelper->RemoveAsPropListener();
92 0 : delete pPropHelper;
93 : }
94 0 : }
95 :
96 0 : PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl()
97 : {
98 0 : if (!pPropHelper)
99 : {
100 0 : Reference< XPropertySet > xPropSet( GetLinguProperties(), UNO_QUERY );
101 :
102 0 : pPropHelper = new PropertyHelper_Hyphenation ((XHyphenator *) this, xPropSet );
103 0 : pPropHelper->AddAsPropListener(); //! after a reference is established
104 : }
105 0 : return *pPropHelper;
106 : }
107 :
108 :
109 0 : Sequence< Locale > SAL_CALL Hyphenator::getLocales()
110 : throw(RuntimeException)
111 : {
112 0 : MutexGuard aGuard( GetLinguMutex() );
113 :
114 : // this routine should return the locales supported by the installed
115 : // dictionaries.
116 :
117 0 : if (!numdict)
118 : {
119 0 : SvtLinguConfig aLinguCfg;
120 :
121 : // get list of dictionaries-to-use
122 : // (or better speaking: the list of dictionaries using the
123 : // new configuration entries).
124 0 : std::list< SvtLinguConfigDictionaryEntry > aDics;
125 0 : uno::Sequence< rtl::OUString > aFormatList;
126 : aLinguCfg.GetSupportedDictionaryFormatsFor( A2OU("Hyphenators"),
127 0 : A2OU("org.openoffice.lingu.LibHnjHyphenator"), aFormatList );
128 0 : sal_Int32 nLen = aFormatList.getLength();
129 0 : for (sal_Int32 i = 0; i < nLen; ++i)
130 : {
131 : std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
132 0 : aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
133 0 : aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
134 0 : }
135 :
136 : //!! for compatibility with old dictionaries (the ones not using extensions
137 : //!! or new configuration entries, but still using the dictionary.lst file)
138 : //!! Get the list of old style spell checking dictionaries to use...
139 : std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
140 0 : GetOldStyleDics( "HYPH" ) );
141 :
142 : // to prefer dictionaries with configuration entries we will only
143 : // use those old style dictionaries that add a language that
144 : // is not yet supported by the list od new style dictionaries
145 0 : MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
146 :
147 0 : numdict = aDics.size();
148 0 : if (numdict)
149 : {
150 : // get supported locales from the dictionaries-to-use...
151 0 : sal_Int32 k = 0;
152 0 : std::set< rtl::OUString, lt_rtl_OUString > aLocaleNamesSet;
153 0 : std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
154 0 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
155 : {
156 0 : uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
157 0 : sal_Int32 nLen2 = aLocaleNames.getLength();
158 0 : for (k = 0; k < nLen2; ++k)
159 : {
160 0 : aLocaleNamesSet.insert( aLocaleNames[k] );
161 : }
162 0 : }
163 : // ... and add them to the resulting sequence
164 0 : aSuppLocales.realloc( aLocaleNamesSet.size() );
165 0 : std::set< rtl::OUString, lt_rtl_OUString >::const_iterator aItB;
166 0 : k = 0;
167 0 : for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB)
168 : {
169 0 : Locale aTmp( LanguageTag( *aItB ).getLocale());
170 0 : aSuppLocales[k++] = aTmp;
171 0 : }
172 :
173 : //! For each dictionary and each locale we need a seperate entry.
174 : //! If this results in more than one dictionary per locale than (for now)
175 : //! it is undefined which dictionary gets used.
176 : //! In the future the implementation should support using several dictionaries
177 : //! for one locale.
178 0 : numdict = 0;
179 0 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
180 0 : numdict = numdict + aDictIt->aLocaleNames.getLength();
181 :
182 : // add dictionary information
183 0 : aDicts = new HDInfo[numdict];
184 :
185 0 : k = 0;
186 0 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
187 : {
188 0 : if (aDictIt->aLocaleNames.getLength() > 0 &&
189 0 : aDictIt->aLocations.getLength() > 0)
190 : {
191 0 : uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
192 0 : sal_Int32 nLocales = aLocaleNames.getLength();
193 :
194 : // currently only one language per dictionary is supported in the actual implementation...
195 : // Thus here we work-around this by adding the same dictionary several times.
196 : // Once for each of it's supported locales.
197 0 : for (sal_Int32 i = 0; i < nLocales; ++i)
198 : {
199 0 : LanguageTag aLanguageTag( aDictIt->aLocaleNames[i] );
200 0 : aDicts[k].aPtr = NULL;
201 0 : aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
202 0 : aDicts[k].aLoc = aLanguageTag.getLocale();
203 0 : aDicts[k].apCC = new CharClass( aLanguageTag );
204 : // also both files have to be in the same directory and the
205 : // file names must only differ in the extension (.aff/.dic).
206 : // Thus we use the first location only and strip the extension part.
207 0 : rtl::OUString aLocation = aDictIt->aLocations[0];
208 0 : sal_Int32 nPos = aLocation.lastIndexOf( '.' );
209 0 : aLocation = aLocation.copy( 0, nPos );
210 0 : aDicts[k].aName = aLocation;
211 :
212 0 : ++k;
213 0 : }
214 : }
215 : }
216 0 : DBG_ASSERT( k == numdict, "index mismatch?" );
217 : }
218 : else
219 : {
220 : /* no dictionary found so register no dictionaries */
221 0 : numdict = 0;
222 0 : aDicts = NULL;
223 0 : aSuppLocales.realloc(0);
224 0 : }
225 : }
226 :
227 0 : return aSuppLocales;
228 : }
229 :
230 :
231 :
232 0 : sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
233 : throw(RuntimeException)
234 : {
235 0 : MutexGuard aGuard( GetLinguMutex() );
236 :
237 0 : sal_Bool bRes = sal_False;
238 0 : if (!aSuppLocales.getLength())
239 0 : getLocales();
240 :
241 0 : const Locale *pLocale = aSuppLocales.getConstArray();
242 0 : sal_Int32 nLen = aSuppLocales.getLength();
243 0 : for (sal_Int32 i = 0; i < nLen; ++i)
244 : {
245 0 : if (rLocale == pLocale[i])
246 : {
247 0 : bRes = sal_True;
248 0 : break;
249 : }
250 : }
251 0 : return bRes;
252 : }
253 :
254 :
255 0 : Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const ::rtl::OUString& aWord,
256 : const ::com::sun::star::lang::Locale& aLocale,
257 : sal_Int16 nMaxLeading,
258 : const ::com::sun::star::beans::PropertyValues& aProperties )
259 : throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException)
260 : {
261 0 : int nHyphenationPos = -1;
262 0 : int nHyphenationPosAlt = -1;
263 0 : int nHyphenationPosAltHyph = -1;
264 : int wordlen;
265 : char *hyphens;
266 : char *lcword;
267 0 : int k = 0;
268 :
269 0 : PropertyHelper_Hyphenation& rHelper = GetPropHelper();
270 0 : rHelper.SetTmpPropVals(aProperties);
271 0 : sal_Int16 minTrail = rHelper.GetMinTrailing();
272 0 : sal_Int16 minLead = rHelper.GetMinLeading();
273 0 : sal_Int16 minLen = rHelper.GetMinWordLength();
274 :
275 0 : HyphenDict *dict = NULL;
276 0 : rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
277 0 : CharClass * pCC = NULL;
278 :
279 0 : Reference< XHyphenatedWord > xRes;
280 :
281 0 : k = -1;
282 0 : for (int j = 0; j < numdict; j++)
283 : {
284 0 : if (aLocale == aDicts[j].aLoc)
285 0 : k = j;
286 : }
287 :
288 : // if we have a hyphenation dictionary matching this locale
289 0 : if (k != -1)
290 : {
291 : // if this dictinary has not been loaded yet do that
292 0 : if (!aDicts[k].aPtr)
293 : {
294 0 : OUString DictFN = aDicts[k].aName + A2OU(".dic");
295 0 : OUString dictpath;
296 :
297 0 : osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
298 0 : OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
299 :
300 : #if defined(WNT)
301 : // workaround for Windows specifc problem that the
302 : // path length in calls to 'fopen' is limted to somewhat
303 : // about 120+ characters which will usually be exceed when
304 : // using dictionaries as extensions.
305 : sTmp = Win_GetShortPathName( dictpath );
306 : #endif
307 :
308 0 : if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
309 : {
310 0 : fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
311 0 : return NULL;
312 : }
313 0 : aDicts[k].aPtr = dict;
314 0 : aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
315 : }
316 :
317 : // other wise hyphenate the word with that dictionary
318 0 : dict = aDicts[k].aPtr;
319 0 : eEnc = aDicts[k].eEnc;
320 0 : pCC = aDicts[k].apCC;
321 :
322 : // we don't want to work with a default text encoding since following incorrect
323 : // results may occur only for specific text and thus may be hard to notice.
324 : // Thus better always make a clean exit here if the text encoding is in question.
325 : // Hopefully something not working at all will raise proper attention quickly. ;-)
326 : DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
327 0 : if (eEnc == RTL_TEXTENCODING_DONTKNOW)
328 0 : return NULL;
329 :
330 0 : sal_uInt16 ct = CAPTYPE_UNKNOWN;
331 0 : ct = capitalType(aWord, pCC);
332 :
333 : // first convert any smart quotes or apostrophes to normal ones
334 0 : OUStringBuffer rBuf(aWord);
335 0 : sal_Int32 nc = rBuf.getLength();
336 : sal_Unicode ch;
337 0 : for (sal_Int32 ix=0; ix < nc; ix++)
338 : {
339 0 : ch = rBuf[ix];
340 0 : if ((ch == 0x201C) || (ch == 0x201D))
341 0 : rBuf[ix] = (sal_Unicode)0x0022;
342 0 : if ((ch == 0x2018) || (ch == 0x2019))
343 0 : rBuf[ix] = (sal_Unicode)0x0027;
344 : }
345 0 : OUString nWord(rBuf.makeStringAndClear());
346 :
347 : // now convert word to all lowercase for pattern recognition
348 0 : OUString nTerm(makeLowerCase(nWord, pCC));
349 :
350 : // now convert word to needed encoding
351 0 : OString encWord(OU2ENC(nTerm,eEnc));
352 :
353 0 : wordlen = encWord.getLength();
354 0 : lcword = new char[wordlen + 1];
355 0 : hyphens = new char[wordlen + 5];
356 :
357 0 : char ** rep = NULL; // replacements of discretionary hyphenation
358 0 : int * pos = NULL; // array of [hyphenation point] minus [deletion position]
359 0 : int * cut = NULL; // length of deletions in original word
360 :
361 : // copy converted word into simple char buffer
362 0 : strcpy(lcword,encWord.getStr());
363 :
364 : // now strip off any ending periods
365 0 : int n = wordlen-1;
366 0 : while((n >=0) && (lcword[n] == '.'))
367 0 : n--;
368 0 : n++;
369 0 : if (n > 0)
370 : {
371 : const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL,
372 : &rep, &pos, &cut, minLead, minTrail,
373 : Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
374 0 : Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
375 0 : if (bFailed)
376 : {
377 : //whoops something did not work
378 0 : delete[] hyphens;
379 0 : delete[] lcword;
380 0 : if (rep)
381 : {
382 0 : for(int j = 0; j < n; j++)
383 : {
384 0 : if (rep[j]) free(rep[j]);
385 : }
386 0 : free(rep);
387 : }
388 0 : if (pos) free(pos);
389 0 : if (cut) free(cut);
390 0 : return NULL;
391 : }
392 : }
393 :
394 : // now backfill hyphens[] for any removed trailing periods
395 0 : for (int c = n; c < wordlen; c++) hyphens[c] = '0';
396 0 : hyphens[wordlen] = '\0';
397 :
398 0 : sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading );
399 :
400 0 : for (sal_Int32 i = 0; i < n; i++)
401 : {
402 0 : int leftrep = 0;
403 0 : sal_Bool hit = (n >= minLen);
404 0 : if (!rep || !rep[i] || (i >= n))
405 : {
406 0 : hit = hit && (hyphens[i]&1) && (i < Leading);
407 0 : hit = hit && (i >= (minLead-1) );
408 0 : hit = hit && ((n - i - 1) >= minTrail);
409 : }
410 : else
411 : {
412 : // calculate change character length before hyphenation point signed with '='
413 0 : for (char * c = rep[i]; *c && (*c != '='); c++)
414 : {
415 0 : if (eEnc == RTL_TEXTENCODING_UTF8)
416 : {
417 0 : if (((unsigned char) *c) >> 6 != 2)
418 0 : leftrep++;
419 : }
420 : else
421 0 : leftrep++;
422 : }
423 0 : hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
424 0 : hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
425 0 : hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
426 : }
427 0 : if (hit)
428 : {
429 0 : nHyphenationPos = i;
430 0 : if (rep && (i < n) && rep[i])
431 : {
432 0 : nHyphenationPosAlt = i - pos[i];
433 0 : nHyphenationPosAltHyph = i + leftrep - pos[i];
434 : }
435 : }
436 : }
437 :
438 0 : if (nHyphenationPos == -1)
439 : {
440 0 : xRes = NULL;
441 : }
442 : else
443 : {
444 0 : if (rep && rep[nHyphenationPos])
445 : {
446 : // remove equal sign
447 0 : char * s = rep[nHyphenationPos];
448 0 : int eq = 0;
449 0 : for (; *s; s++)
450 : {
451 0 : if (*s == '=') eq = 1;
452 0 : if (eq) *s = *(s + 1);
453 : }
454 0 : OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
455 0 : OUString repHyph;
456 0 : switch (ct)
457 : {
458 : case CAPTYPE_ALLCAP:
459 : {
460 0 : repHyph = makeUpperCase(repHyphlow, pCC);
461 0 : break;
462 : }
463 : case CAPTYPE_INITCAP:
464 : {
465 0 : if (nHyphenationPosAlt == -1)
466 0 : repHyph = makeInitCap(repHyphlow, pCC);
467 : else
468 0 : repHyph = repHyphlow;
469 0 : break;
470 : }
471 : default:
472 : {
473 0 : repHyph = repHyphlow;
474 0 : break;
475 : }
476 : }
477 :
478 : // handle shortening
479 : sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
480 0 : nHyphenationPosAltHyph : nHyphenationPos);
481 : // dicretionary hyphenation
482 0 : xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LanguageTag( aLocale ).getLanguageType(), nPos,
483 0 : aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
484 0 : (sal_Int16) nHyphenationPosAltHyph);
485 : }
486 : else
487 : {
488 0 : xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LanguageTag( aLocale ).getLanguageType(),
489 0 : (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos);
490 : }
491 : }
492 :
493 0 : delete[] lcword;
494 0 : delete[] hyphens;
495 0 : if (rep)
496 : {
497 0 : for(int j = 0; j < n; j++)
498 : {
499 0 : if (rep[j]) free(rep[j]);
500 : }
501 0 : free(rep);
502 : }
503 0 : if (pos) free(pos);
504 0 : if (cut) free(cut);
505 0 : return xRes;
506 : }
507 0 : return NULL;
508 : }
509 :
510 :
511 0 : Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
512 : const ::rtl::OUString& /*aWord*/,
513 : const ::com::sun::star::lang::Locale& /*aLocale*/,
514 : sal_Int16 /*nIndex*/,
515 : const ::com::sun::star::beans::PropertyValues& /*aProperties*/ )
516 : throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
517 : {
518 : /* alternative spelling isn't supported by tex dictionaries */
519 : /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */
520 : /* TASK: implement queryAlternativeSpelling() */
521 0 : return NULL;
522 : }
523 :
524 0 : Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord,
525 : const ::com::sun::star::lang::Locale& aLocale,
526 : const ::com::sun::star::beans::PropertyValues& aProperties )
527 : throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
528 : {
529 0 : PropertyHelper_Hyphenation& rHelper = GetPropHelper();
530 0 : rHelper.SetTmpPropVals(aProperties);
531 0 : sal_Int16 minTrail = rHelper.GetMinTrailing();
532 0 : sal_Int16 minLead = rHelper.GetMinLeading();
533 0 : sal_Int16 minLen = rHelper.GetMinWordLength();
534 :
535 : //Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as
536 : //well as "hyphenate"
537 0 : if (aWord.getLength() < minLen)
538 : {
539 0 : return PossibleHyphens::CreatePossibleHyphens( aWord, LanguageTag( aLocale ).getLanguageType(),
540 0 : aWord, Sequence< sal_Int16 >() );
541 : }
542 :
543 0 : int k = -1;
544 0 : for (int j = 0; j < numdict; j++)
545 : {
546 0 : if (aLocale == aDicts[j].aLoc) k = j;
547 : }
548 :
549 : // if we have a hyphenation dictionary matching this locale
550 0 : if (k != -1)
551 : {
552 0 : HyphenDict *dict = NULL;
553 : // if this dictioanry has not been loaded yet do that
554 0 : if (!aDicts[k].aPtr)
555 : {
556 0 : OUString DictFN = aDicts[k].aName + A2OU(".dic");
557 0 : OUString dictpath;
558 :
559 0 : osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
560 0 : OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
561 :
562 : #if defined(WNT)
563 : // workaround for Windows specifc problem that the
564 : // path length in calls to 'fopen' is limted to somewhat
565 : // about 120+ characters which will usually be exceed when
566 : // using dictionaries as extensions.
567 : sTmp = Win_GetShortPathName( dictpath );
568 : #endif
569 :
570 0 : if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
571 : {
572 0 : fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
573 0 : return NULL;
574 : }
575 0 : aDicts[k].aPtr = dict;
576 0 : aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
577 : }
578 :
579 : // other wise hyphenate the word with that dictionary
580 0 : dict = aDicts[k].aPtr;
581 0 : rtl_TextEncoding eEnc = aDicts[k].eEnc;
582 0 : CharClass* pCC = aDicts[k].apCC;
583 :
584 : // we don't want to work with a default text encoding since following incorrect
585 : // results may occur only for specific text and thus may be hard to notice.
586 : // Thus better always make a clean exit here if the text encoding is in question.
587 : // Hopefully something not working at all will raise proper attention quickly. ;-)
588 : DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
589 0 : if (eEnc == RTL_TEXTENCODING_DONTKNOW)
590 0 : return NULL;
591 :
592 : // first handle smart quotes both single and double
593 0 : OUStringBuffer rBuf(aWord);
594 0 : sal_Int32 nc = rBuf.getLength();
595 : sal_Unicode ch;
596 0 : for (sal_Int32 ix=0; ix < nc; ix++)
597 : {
598 0 : ch = rBuf[ix];
599 0 : if ((ch == 0x201C) || (ch == 0x201D))
600 0 : rBuf[ix] = (sal_Unicode)0x0022;
601 0 : if ((ch == 0x2018) || (ch == 0x2019))
602 0 : rBuf[ix] = (sal_Unicode)0x0027;
603 : }
604 0 : OUString nWord(rBuf.makeStringAndClear());
605 :
606 : // now convert word to all lowercase for pattern recognition
607 0 : OUString nTerm(makeLowerCase(nWord, pCC));
608 :
609 : // now convert word to needed encoding
610 0 : OString encWord(OU2ENC(nTerm,eEnc));
611 :
612 0 : int wordlen = encWord.getLength();
613 0 : char *lcword = new char[wordlen+1];
614 0 : char *hyphens = new char[wordlen+5];
615 0 : char ** rep = NULL; // replacements of discretionary hyphenation
616 0 : int * pos = NULL; // array of [hyphenation point] minus [deletion position]
617 0 : int * cut = NULL; // length of deletions in original word
618 :
619 : // copy converted word into simple char buffer
620 0 : strcpy(lcword,encWord.getStr());
621 :
622 : // first remove any trailing periods
623 0 : int n = wordlen-1;
624 0 : while((n >=0) && (lcword[n] == '.'))
625 0 : n--;
626 0 : n++;
627 0 : if (n > 0)
628 : {
629 : const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL,
630 : &rep, &pos, &cut, minLead, minTrail,
631 : Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
632 0 : Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
633 0 : if (bFailed)
634 : {
635 0 : delete[] hyphens;
636 0 : delete[] lcword;
637 :
638 0 : if (rep)
639 : {
640 0 : for(int j = 0; j < n; j++)
641 : {
642 0 : if (rep[j]) free(rep[j]);
643 : }
644 0 : free(rep);
645 : }
646 0 : if (pos) free(pos);
647 0 : if (cut) free(cut);
648 :
649 0 : return NULL;
650 : }
651 : }
652 : // now backfill hyphens[] for any removed periods
653 0 : for (int c = n; c < wordlen; c++)
654 0 : hyphens[c] = '0';
655 0 : hyphens[wordlen] = '\0';
656 :
657 0 : sal_Int16 nHyphCount = 0;
658 : sal_Int16 i;
659 :
660 0 : for ( i = 0; i < encWord.getLength(); i++)
661 : {
662 0 : if (hyphens[i]&1 && (!rep || !rep[i]))
663 0 : nHyphCount++;
664 : }
665 :
666 0 : Sequence< sal_Int16 > aHyphPos(nHyphCount);
667 0 : sal_Int16 *pPos = aHyphPos.getArray();
668 0 : OUStringBuffer hyphenatedWordBuffer;
669 0 : nHyphCount = 0;
670 :
671 0 : for (i = 0; i < nWord.getLength(); i++)
672 : {
673 0 : hyphenatedWordBuffer.append(aWord[i]);
674 : // hyphenation position (not alternative)
675 0 : if (hyphens[i]&1 && (!rep || !rep[i]))
676 : {
677 0 : pPos[nHyphCount] = i;
678 0 : hyphenatedWordBuffer.append(sal_Unicode('='));
679 0 : nHyphCount++;
680 : }
681 : }
682 :
683 0 : OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
684 :
685 : Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens(
686 0 : aWord, LanguageTag( aLocale ).getLanguageType(), hyphenatedWord, aHyphPos);
687 :
688 0 : delete[] hyphens;
689 0 : delete[] lcword;
690 :
691 0 : if (rep)
692 : {
693 0 : for(int j = 0; j < n; j++)
694 : {
695 0 : if (rep[j]) free(rep[j]);
696 : }
697 0 : free(rep);
698 : }
699 0 : if (pos) free(pos);
700 0 : if (cut) free(cut);
701 :
702 0 : return xRes;
703 : }
704 :
705 0 : return NULL;
706 : }
707 :
708 0 : OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC)
709 : {
710 0 : if (pCC)
711 0 : return pCC->lowercase(aTerm);
712 0 : return aTerm;
713 : }
714 :
715 0 : OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC)
716 : {
717 0 : if (pCC)
718 0 : return pCC->uppercase(aTerm);
719 0 : return aTerm;
720 : }
721 :
722 :
723 0 : OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC)
724 : {
725 0 : sal_Int32 tlen = aTerm.getLength();
726 0 : if ((pCC) && (tlen))
727 : {
728 0 : OUString bTemp = aTerm.copy(0,1);
729 0 : if (tlen > 1)
730 0 : return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) );
731 :
732 0 : return pCC->uppercase(bTemp, 0, 1);
733 : }
734 0 : return aTerm;
735 : }
736 :
737 :
738 0 : Reference< XInterface > SAL_CALL Hyphenator_CreateInstance(
739 : const Reference< XMultiServiceFactory > & /*rSMgr*/ )
740 : throw(Exception)
741 : {
742 0 : Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator;
743 0 : return xService;
744 : }
745 :
746 :
747 0 : sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
748 : const Reference< XLinguServiceEventListener >& rxLstnr )
749 : throw(RuntimeException)
750 : {
751 0 : MutexGuard aGuard( GetLinguMutex() );
752 :
753 0 : sal_Bool bRes = sal_False;
754 0 : if (!bDisposing && rxLstnr.is())
755 : {
756 0 : bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
757 : }
758 0 : return bRes;
759 : }
760 :
761 :
762 0 : sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
763 : const Reference< XLinguServiceEventListener >& rxLstnr )
764 : throw(RuntimeException)
765 : {
766 0 : MutexGuard aGuard( GetLinguMutex() );
767 :
768 0 : sal_Bool bRes = sal_False;
769 0 : if (!bDisposing && rxLstnr.is())
770 : {
771 0 : bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
772 : }
773 0 : return bRes;
774 : }
775 :
776 :
777 0 : OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
778 : throw(RuntimeException)
779 : {
780 0 : MutexGuard aGuard( GetLinguMutex() );
781 0 : return A2OU( "Libhyphen Hyphenator" );
782 : }
783 :
784 :
785 0 : void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
786 : throw(Exception, RuntimeException)
787 : {
788 0 : MutexGuard aGuard( GetLinguMutex() );
789 :
790 0 : if (!pPropHelper)
791 : {
792 0 : sal_Int32 nLen = rArguments.getLength();
793 0 : if (2 == nLen)
794 : {
795 0 : Reference< XPropertySet > xPropSet;
796 0 : rArguments.getConstArray()[0] >>= xPropSet;
797 : //rArguments.getConstArray()[1] >>= xDicList;
798 :
799 : //! Pointer allows for access of the non-UNO functions.
800 : //! And the reference to the UNO-functions while increasing
801 : //! the ref-count and will implicitly free the memory
802 : //! when the object is not longer used.
803 0 : pPropHelper = new PropertyHelper_Hyphenation( (XHyphenator *) this, xPropSet );
804 0 : pPropHelper->AddAsPropListener(); //! after a reference is established
805 : }
806 : else {
807 : OSL_FAIL( "wrong number of arguments in sequence" );
808 : }
809 0 : }
810 0 : }
811 :
812 :
813 0 : void SAL_CALL Hyphenator::dispose()
814 : throw(RuntimeException)
815 : {
816 0 : MutexGuard aGuard( GetLinguMutex() );
817 :
818 0 : if (!bDisposing)
819 : {
820 0 : bDisposing = true;
821 0 : EventObject aEvtObj( (XHyphenator *) this );
822 0 : aEvtListeners.disposeAndClear( aEvtObj );
823 0 : if (pPropHelper)
824 : {
825 0 : pPropHelper->RemoveAsPropListener();
826 0 : delete pPropHelper;
827 0 : pPropHelper = NULL;
828 0 : }
829 0 : }
830 0 : }
831 :
832 :
833 0 : void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
834 : throw(RuntimeException)
835 : {
836 0 : MutexGuard aGuard( GetLinguMutex() );
837 :
838 0 : if (!bDisposing && rxListener.is())
839 0 : aEvtListeners.addInterface( rxListener );
840 0 : }
841 :
842 :
843 0 : void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
844 : throw(RuntimeException)
845 : {
846 0 : MutexGuard aGuard( GetLinguMutex() );
847 :
848 0 : if (!bDisposing && rxListener.is())
849 0 : aEvtListeners.removeInterface( rxListener );
850 0 : }
851 :
852 :
853 : ///////////////////////////////////////////////////////////////////////////
854 : // Service specific part
855 : //
856 :
857 0 : OUString SAL_CALL Hyphenator::getImplementationName()
858 : throw(RuntimeException)
859 : {
860 0 : MutexGuard aGuard( GetLinguMutex() );
861 :
862 0 : return getImplementationName_Static();
863 : }
864 :
865 :
866 0 : sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
867 : throw(RuntimeException)
868 : {
869 0 : MutexGuard aGuard( GetLinguMutex() );
870 :
871 0 : Sequence< OUString > aSNL = getSupportedServiceNames();
872 0 : const OUString * pArray = aSNL.getConstArray();
873 0 : for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
874 0 : if( pArray[i] == ServiceName )
875 0 : return sal_True;
876 0 : return sal_False;
877 : }
878 :
879 :
880 0 : Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
881 : throw(RuntimeException)
882 : {
883 0 : MutexGuard aGuard( GetLinguMutex() );
884 :
885 0 : return getSupportedServiceNames_Static();
886 : }
887 :
888 :
889 0 : Sequence< OUString > Hyphenator::getSupportedServiceNames_Static()
890 : throw()
891 : {
892 0 : MutexGuard aGuard( GetLinguMutex() );
893 :
894 0 : Sequence< OUString > aSNS( 1 ); // auch mehr als 1 Service moeglich
895 0 : aSNS.getArray()[0] = A2OU( SN_HYPHENATOR );
896 0 : return aSNS;
897 : }
898 :
899 0 : void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName,
900 : XMultiServiceFactory * pServiceManager, void * )
901 : {
902 0 : void * pRet = 0;
903 0 : if ( !Hyphenator::getImplementationName_Static().compareToAscii( pImplName ) )
904 : {
905 : Reference< XSingleServiceFactory > xFactory =
906 : cppu::createOneInstanceFactory(
907 : pServiceManager,
908 : Hyphenator::getImplementationName_Static(),
909 : Hyphenator_CreateInstance,
910 0 : Hyphenator::getSupportedServiceNames_Static());
911 : // acquire, because we return an interface pointer instead of a reference
912 0 : xFactory->acquire();
913 0 : pRet = xFactory.get();
914 : }
915 0 : return pRet;
916 : }
917 :
918 :
919 : ///////////////////////////////////////////////////////////////////////////
920 :
921 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|