Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <com/sun/star/uno/Reference.h>
21 : #include <com/sun/star/linguistic2/XSearchableDictionaryList.hpp>
22 :
23 : #include <cppuhelper/factory.hxx> // helper for factories
24 : #include <com/sun/star/registry/XRegistryKey.hpp>
25 : #include <i18npool/languagetag.hxx>
26 : #include <tools/debug.hxx>
27 : #include <osl/mutex.hxx>
28 :
29 : #include <hyphen.h>
30 : #include <hyphenimp.hxx>
31 :
32 : #include <linguistic/hyphdta.hxx>
33 : #include <rtl/ustring.hxx>
34 : #include <rtl/ustrbuf.hxx>
35 : #include <rtl/textenc.h>
36 :
37 : #include <linguistic/lngprops.hxx>
38 : #include <unotools/pathoptions.hxx>
39 : #include <unotools/useroptions.hxx>
40 : #include <unotools/lingucfg.hxx>
41 : #include <osl/file.hxx>
42 :
43 : #include <stdio.h>
44 : #include <string.h>
45 :
46 : #include <list>
47 : #include <set>
48 :
49 : using namespace utl;
50 : using namespace osl;
51 : using namespace com::sun::star;
52 : using namespace com::sun::star::beans;
53 : using namespace com::sun::star::lang;
54 : using namespace com::sun::star::uno;
55 : using namespace com::sun::star::linguistic2;
56 : using namespace linguistic;
57 :
58 : using ::rtl::OUString;
59 :
60 : // values asigned to capitalization types
61 : #define CAPTYPE_UNKNOWN 0
62 : #define CAPTYPE_NOCAP 1
63 : #define CAPTYPE_INITCAP 2
64 : #define CAPTYPE_ALLCAP 3
65 : #define CAPTYPE_MIXED 4
66 :
67 : // min, max
68 : #define Max(a,b) (a > b ? a : b)
69 :
70 : ///////////////////////////////////////////////////////////////////////////
71 :
72 :
73 1 : Hyphenator::Hyphenator() :
74 1 : aEvtListeners ( GetLinguMutex() )
75 : {
76 1 : bDisposing = false;
77 1 : pPropHelper = NULL;
78 1 : aDicts = NULL;
79 1 : numdict = 0;
80 1 : }
81 :
82 3 : Hyphenator::~Hyphenator()
83 : {
84 1 : if (numdict && aDicts)
85 : {
86 44 : for (int i=0; i < numdict; ++i)
87 : {
88 43 : delete aDicts[i].apCC;
89 43 : if (aDicts[i].aPtr)
90 0 : hnj_hyphen_free(aDicts[i].aPtr);
91 : }
92 : }
93 1 : delete[] aDicts;
94 :
95 1 : if (pPropHelper)
96 : {
97 0 : pPropHelper->RemoveAsPropListener();
98 0 : delete pPropHelper;
99 : }
100 2 : }
101 :
102 0 : PropertyHelper_Hyphenation& Hyphenator::GetPropHelper_Impl()
103 : {
104 0 : if (!pPropHelper)
105 : {
106 0 : Reference< XPropertySet > xPropSet( GetLinguProperties(), UNO_QUERY );
107 :
108 0 : pPropHelper = new PropertyHelper_Hyphenation ((XHyphenator *) this, xPropSet );
109 0 : pPropHelper->AddAsPropListener(); //! after a reference is established
110 : }
111 0 : return *pPropHelper;
112 : }
113 :
114 :
115 2 : Sequence< Locale > SAL_CALL Hyphenator::getLocales()
116 : throw(RuntimeException)
117 : {
118 2 : MutexGuard aGuard( GetLinguMutex() );
119 :
120 : // this routine should return the locales supported by the installed
121 : // dictionaries.
122 :
123 2 : if (!numdict)
124 : {
125 1 : SvtLinguConfig aLinguCfg;
126 :
127 : // get list of dictionaries-to-use
128 : // (or better speaking: the list of dictionaries using the
129 : // new configuration entries).
130 1 : std::list< SvtLinguConfigDictionaryEntry > aDics;
131 1 : uno::Sequence< rtl::OUString > aFormatList;
132 : aLinguCfg.GetSupportedDictionaryFormatsFor( A2OU("Hyphenators"),
133 1 : A2OU("org.openoffice.lingu.LibHnjHyphenator"), aFormatList );
134 1 : sal_Int32 nLen = aFormatList.getLength();
135 2 : for (sal_Int32 i = 0; i < nLen; ++i)
136 : {
137 : std::vector< SvtLinguConfigDictionaryEntry > aTmpDic(
138 1 : aLinguCfg.GetActiveDictionariesByFormat( aFormatList[i] ) );
139 1 : aDics.insert( aDics.end(), aTmpDic.begin(), aTmpDic.end() );
140 1 : }
141 :
142 : //!! for compatibility with old dictionaries (the ones not using extensions
143 : //!! or new configuration entries, but still using the dictionary.lst file)
144 : //!! Get the list of old style spell checking dictionaries to use...
145 : std::vector< SvtLinguConfigDictionaryEntry > aOldStyleDics(
146 1 : GetOldStyleDics( "HYPH" ) );
147 :
148 : // to prefer dictionaries with configuration entries we will only
149 : // use those old style dictionaries that add a language that
150 : // is not yet supported by the list od new style dictionaries
151 1 : MergeNewStyleDicsAndOldStyleDics( aDics, aOldStyleDics );
152 :
153 1 : numdict = aDics.size();
154 1 : if (numdict)
155 : {
156 : // get supported locales from the dictionaries-to-use...
157 1 : sal_Int32 k = 0;
158 1 : std::set< rtl::OUString, lt_rtl_OUString > aLocaleNamesSet;
159 1 : std::list< SvtLinguConfigDictionaryEntry >::const_iterator aDictIt;
160 5 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
161 : {
162 4 : uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
163 4 : sal_Int32 nLen2 = aLocaleNames.getLength();
164 47 : for (k = 0; k < nLen2; ++k)
165 : {
166 43 : aLocaleNamesSet.insert( aLocaleNames[k] );
167 : }
168 4 : }
169 : // ... and add them to the resulting sequence
170 1 : aSuppLocales.realloc( aLocaleNamesSet.size() );
171 1 : std::set< rtl::OUString, lt_rtl_OUString >::const_iterator aItB;
172 1 : k = 0;
173 44 : for (aItB = aLocaleNamesSet.begin(); aItB != aLocaleNamesSet.end(); ++aItB)
174 : {
175 43 : Locale aTmp( LanguageTag( *aItB ).getLocale());
176 43 : aSuppLocales[k++] = aTmp;
177 43 : }
178 :
179 : //! For each dictionary and each locale we need a seperate entry.
180 : //! If this results in more than one dictionary per locale than (for now)
181 : //! it is undefined which dictionary gets used.
182 : //! In the future the implementation should support using several dictionaries
183 : //! for one locale.
184 1 : numdict = 0;
185 5 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
186 4 : numdict = numdict + aDictIt->aLocaleNames.getLength();
187 :
188 : // add dictionary information
189 1 : aDicts = new HDInfo[numdict];
190 :
191 1 : k = 0;
192 5 : for (aDictIt = aDics.begin(); aDictIt != aDics.end(); ++aDictIt)
193 : {
194 8 : if (aDictIt->aLocaleNames.getLength() > 0 &&
195 4 : aDictIt->aLocations.getLength() > 0)
196 : {
197 4 : uno::Sequence< rtl::OUString > aLocaleNames( aDictIt->aLocaleNames );
198 4 : sal_Int32 nLocales = aLocaleNames.getLength();
199 :
200 : // currently only one language per dictionary is supported in the actual implementation...
201 : // Thus here we work-around this by adding the same dictionary several times.
202 : // Once for each of it's supported locales.
203 47 : for (sal_Int32 i = 0; i < nLocales; ++i)
204 : {
205 43 : LanguageTag aLanguageTag( aDictIt->aLocaleNames[i] );
206 43 : aDicts[k].aPtr = NULL;
207 43 : aDicts[k].eEnc = RTL_TEXTENCODING_DONTKNOW;
208 43 : aDicts[k].aLoc = aLanguageTag.getLocale();
209 43 : aDicts[k].apCC = new CharClass( aLanguageTag );
210 : // also both files have to be in the same directory and the
211 : // file names must only differ in the extension (.aff/.dic).
212 : // Thus we use the first location only and strip the extension part.
213 43 : rtl::OUString aLocation = aDictIt->aLocations[0];
214 43 : sal_Int32 nPos = aLocation.lastIndexOf( '.' );
215 43 : aLocation = aLocation.copy( 0, nPos );
216 43 : aDicts[k].aName = aLocation;
217 :
218 43 : ++k;
219 47 : }
220 : }
221 : }
222 1 : DBG_ASSERT( k == numdict, "index mismatch?" );
223 : }
224 : else
225 : {
226 : /* no dictionary found so register no dictionaries */
227 0 : numdict = 0;
228 0 : aDicts = NULL;
229 0 : aSuppLocales.realloc(0);
230 1 : }
231 : }
232 :
233 2 : return aSuppLocales;
234 : }
235 :
236 :
237 :
238 0 : sal_Bool SAL_CALL Hyphenator::hasLocale(const Locale& rLocale)
239 : throw(RuntimeException)
240 : {
241 0 : MutexGuard aGuard( GetLinguMutex() );
242 :
243 0 : sal_Bool bRes = sal_False;
244 0 : if (!aSuppLocales.getLength())
245 0 : getLocales();
246 :
247 0 : const Locale *pLocale = aSuppLocales.getConstArray();
248 0 : sal_Int32 nLen = aSuppLocales.getLength();
249 0 : for (sal_Int32 i = 0; i < nLen; ++i)
250 : {
251 0 : if (rLocale == pLocale[i])
252 : {
253 0 : bRes = sal_True;
254 0 : break;
255 : }
256 : }
257 0 : return bRes;
258 : }
259 :
260 :
261 0 : Reference< XHyphenatedWord > SAL_CALL Hyphenator::hyphenate( const ::rtl::OUString& aWord,
262 : const ::com::sun::star::lang::Locale& aLocale,
263 : sal_Int16 nMaxLeading,
264 : const ::com::sun::star::beans::PropertyValues& aProperties )
265 : throw (com::sun::star::uno::RuntimeException, com::sun::star::lang::IllegalArgumentException)
266 : {
267 0 : int nHyphenationPos = -1;
268 0 : int nHyphenationPosAlt = -1;
269 0 : int nHyphenationPosAltHyph = -1;
270 : int wordlen;
271 : char *hyphens;
272 : char *lcword;
273 0 : int k = 0;
274 :
275 0 : PropertyHelper_Hyphenation& rHelper = GetPropHelper();
276 0 : rHelper.SetTmpPropVals(aProperties);
277 0 : sal_Int16 minTrail = rHelper.GetMinTrailing();
278 0 : sal_Int16 minLead = rHelper.GetMinLeading();
279 0 : sal_Int16 minLen = rHelper.GetMinWordLength();
280 :
281 0 : HyphenDict *dict = NULL;
282 0 : rtl_TextEncoding eEnc = RTL_TEXTENCODING_DONTKNOW;
283 0 : CharClass * pCC = NULL;
284 :
285 0 : Reference< XHyphenatedWord > xRes;
286 :
287 0 : k = -1;
288 0 : for (int j = 0; j < numdict; j++)
289 : {
290 0 : if (aLocale == aDicts[j].aLoc)
291 0 : k = j;
292 : }
293 :
294 : // if we have a hyphenation dictionary matching this locale
295 0 : if (k != -1)
296 : {
297 : // if this dictinary has not been loaded yet do that
298 0 : if (!aDicts[k].aPtr)
299 : {
300 0 : OUString DictFN = aDicts[k].aName + A2OU(".dic");
301 0 : OUString dictpath;
302 :
303 0 : osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
304 0 : OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
305 :
306 : #if defined(WNT)
307 : // workaround for Windows specifc problem that the
308 : // path length in calls to 'fopen' is limted to somewhat
309 : // about 120+ characters which will usually be exceed when
310 : // using dictionaries as extensions.
311 : sTmp = Win_GetShortPathName( dictpath );
312 : #endif
313 :
314 0 : if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
315 : {
316 0 : fprintf(stderr, "Couldn't find file %s\n", OU2ENC(dictpath, osl_getThreadTextEncoding()) );
317 0 : return NULL;
318 : }
319 0 : aDicts[k].aPtr = dict;
320 0 : aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
321 : }
322 :
323 : // other wise hyphenate the word with that dictionary
324 0 : dict = aDicts[k].aPtr;
325 0 : eEnc = aDicts[k].eEnc;
326 0 : pCC = aDicts[k].apCC;
327 :
328 : // we don't want to work with a default text encoding since following incorrect
329 : // results may occur only for specific text and thus may be hard to notice.
330 : // Thus better always make a clean exit here if the text encoding is in question.
331 : // Hopefully something not working at all will raise proper attention quickly. ;-)
332 : DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
333 0 : if (eEnc == RTL_TEXTENCODING_DONTKNOW)
334 0 : return NULL;
335 :
336 0 : sal_uInt16 ct = CAPTYPE_UNKNOWN;
337 0 : ct = capitalType(aWord, pCC);
338 :
339 : // first convert any smart quotes or apostrophes to normal ones
340 0 : OUStringBuffer rBuf(aWord);
341 0 : sal_Int32 nc = rBuf.getLength();
342 : sal_Unicode ch;
343 0 : for (sal_Int32 ix=0; ix < nc; ix++)
344 : {
345 0 : ch = rBuf[ix];
346 0 : if ((ch == 0x201C) || (ch == 0x201D))
347 0 : rBuf[ix] = (sal_Unicode)0x0022;
348 0 : if ((ch == 0x2018) || (ch == 0x2019))
349 0 : rBuf[ix] = (sal_Unicode)0x0027;
350 : }
351 0 : OUString nWord(rBuf.makeStringAndClear());
352 :
353 : // now convert word to all lowercase for pattern recognition
354 0 : OUString nTerm(makeLowerCase(nWord, pCC));
355 :
356 : // now convert word to needed encoding
357 0 : OString encWord(OU2ENC(nTerm,eEnc));
358 :
359 0 : wordlen = encWord.getLength();
360 0 : lcword = new char[wordlen + 1];
361 0 : hyphens = new char[wordlen + 5];
362 :
363 0 : char ** rep = NULL; // replacements of discretionary hyphenation
364 0 : int * pos = NULL; // array of [hyphenation point] minus [deletion position]
365 0 : int * cut = NULL; // length of deletions in original word
366 :
367 : // copy converted word into simple char buffer
368 0 : strcpy(lcword,encWord.getStr());
369 :
370 : // now strip off any ending periods
371 0 : int n = wordlen-1;
372 0 : while((n >=0) && (lcword[n] == '.'))
373 0 : n--;
374 0 : n++;
375 0 : if (n > 0)
376 : {
377 : const bool bFailed = 0 != hnj_hyphen_hyphenate3( dict, lcword, n, hyphens, NULL,
378 : &rep, &pos, &cut, minLead, minTrail,
379 : Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
380 0 : Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
381 0 : if (bFailed)
382 : {
383 : //whoops something did not work
384 0 : delete[] hyphens;
385 0 : delete[] lcword;
386 0 : if (rep)
387 : {
388 0 : for(int j = 0; j < n; j++)
389 : {
390 0 : if (rep[j]) free(rep[j]);
391 : }
392 0 : free(rep);
393 : }
394 0 : if (pos) free(pos);
395 0 : if (cut) free(cut);
396 0 : return NULL;
397 : }
398 : }
399 :
400 : // now backfill hyphens[] for any removed trailing periods
401 0 : for (int c = n; c < wordlen; c++) hyphens[c] = '0';
402 0 : hyphens[wordlen] = '\0';
403 :
404 0 : sal_Int32 Leading = GetPosInWordToCheck( aWord, nMaxLeading );
405 :
406 0 : for (sal_Int32 i = 0; i < n; i++)
407 : {
408 0 : int leftrep = 0;
409 0 : sal_Bool hit = (n >= minLen);
410 0 : if (!rep || !rep[i] || (i >= n))
411 : {
412 0 : hit = hit && (hyphens[i]&1) && (i < Leading);
413 0 : hit = hit && (i >= (minLead-1) );
414 0 : hit = hit && ((n - i - 1) >= minTrail);
415 : }
416 : else
417 : {
418 : // calculate change character length before hyphenation point signed with '='
419 0 : for (char * c = rep[i]; *c && (*c != '='); c++)
420 : {
421 0 : if (eEnc == RTL_TEXTENCODING_UTF8)
422 : {
423 0 : if (((unsigned char) *c) >> 6 != 2)
424 0 : leftrep++;
425 : }
426 : else
427 0 : leftrep++;
428 : }
429 0 : hit = hit && (hyphens[i]&1) && ((i + leftrep - pos[i]) < Leading);
430 0 : hit = hit && ((i + leftrep - pos[i]) >= (minLead-1) );
431 0 : hit = hit && ((n - i - 1 + sal::static_int_cast< sal_sSize >(strlen(rep[i])) - leftrep - 1) >= minTrail);
432 : }
433 0 : if (hit)
434 : {
435 0 : nHyphenationPos = i;
436 0 : if (rep && (i < n) && rep[i])
437 : {
438 0 : nHyphenationPosAlt = i - pos[i];
439 0 : nHyphenationPosAltHyph = i + leftrep - pos[i];
440 : }
441 : }
442 : }
443 :
444 0 : if (nHyphenationPos == -1)
445 : {
446 0 : xRes = NULL;
447 : }
448 : else
449 : {
450 0 : if (rep && rep[nHyphenationPos])
451 : {
452 : // remove equal sign
453 0 : char * s = rep[nHyphenationPos];
454 0 : int eq = 0;
455 0 : for (; *s; s++)
456 : {
457 0 : if (*s == '=') eq = 1;
458 0 : if (eq) *s = *(s + 1);
459 : }
460 0 : OUString repHyphlow(rep[nHyphenationPos], strlen(rep[nHyphenationPos]), eEnc);
461 0 : OUString repHyph;
462 0 : switch (ct)
463 : {
464 : case CAPTYPE_ALLCAP:
465 : {
466 0 : repHyph = makeUpperCase(repHyphlow, pCC);
467 0 : break;
468 : }
469 : case CAPTYPE_INITCAP:
470 : {
471 0 : if (nHyphenationPosAlt == -1)
472 0 : repHyph = makeInitCap(repHyphlow, pCC);
473 : else
474 0 : repHyph = repHyphlow;
475 0 : break;
476 : }
477 : default:
478 : {
479 0 : repHyph = repHyphlow;
480 0 : break;
481 : }
482 : }
483 :
484 : // handle shortening
485 : sal_Int16 nPos = (sal_Int16) ((nHyphenationPosAltHyph < nHyphenationPos) ?
486 0 : nHyphenationPosAltHyph : nHyphenationPos);
487 : // dicretionary hyphenation
488 0 : xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LanguageTag( aLocale ).getLanguageType(), nPos,
489 0 : aWord.replaceAt(nHyphenationPosAlt + 1, cut[nHyphenationPos], repHyph),
490 0 : (sal_Int16) nHyphenationPosAltHyph);
491 : }
492 : else
493 : {
494 0 : xRes = HyphenatedWord::CreateHyphenatedWord( aWord, LanguageTag( aLocale ).getLanguageType(),
495 0 : (sal_Int16)nHyphenationPos, aWord, (sal_Int16) nHyphenationPos);
496 : }
497 : }
498 :
499 0 : delete[] lcword;
500 0 : delete[] hyphens;
501 0 : if (rep)
502 : {
503 0 : for(int j = 0; j < n; j++)
504 : {
505 0 : if (rep[j]) free(rep[j]);
506 : }
507 0 : free(rep);
508 : }
509 0 : if (pos) free(pos);
510 0 : if (cut) free(cut);
511 0 : return xRes;
512 : }
513 0 : return NULL;
514 : }
515 :
516 :
517 0 : Reference < XHyphenatedWord > SAL_CALL Hyphenator::queryAlternativeSpelling(
518 : const ::rtl::OUString& /*aWord*/,
519 : const ::com::sun::star::lang::Locale& /*aLocale*/,
520 : sal_Int16 /*nIndex*/,
521 : const ::com::sun::star::beans::PropertyValues& /*aProperties*/ )
522 : throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
523 : {
524 : /* alternative spelling isn't supported by tex dictionaries */
525 : /* XXX: OOo's extended libhjn algorithm can support alternative spellings with extended TeX dic. */
526 : /* TASK: implement queryAlternativeSpelling() */
527 0 : return NULL;
528 : }
529 :
530 0 : Reference< XPossibleHyphens > SAL_CALL Hyphenator::createPossibleHyphens( const ::rtl::OUString& aWord,
531 : const ::com::sun::star::lang::Locale& aLocale,
532 : const ::com::sun::star::beans::PropertyValues& aProperties )
533 : throw(::com::sun::star::lang::IllegalArgumentException, ::com::sun::star::uno::RuntimeException)
534 : {
535 0 : PropertyHelper_Hyphenation& rHelper = GetPropHelper();
536 0 : rHelper.SetTmpPropVals(aProperties);
537 0 : sal_Int16 minTrail = rHelper.GetMinTrailing();
538 0 : sal_Int16 minLead = rHelper.GetMinLeading();
539 0 : sal_Int16 minLen = rHelper.GetMinWordLength();
540 :
541 : //Resolves: fdo#41083 honour MinWordLength in "createPossibleHyphens" as
542 : //well as "hyphenate"
543 0 : if (aWord.getLength() < minLen)
544 : {
545 0 : return PossibleHyphens::CreatePossibleHyphens( aWord, LanguageTag( aLocale ).getLanguageType(),
546 0 : aWord, Sequence< sal_Int16 >() );
547 : }
548 :
549 0 : int k = -1;
550 0 : for (int j = 0; j < numdict; j++)
551 : {
552 0 : if (aLocale == aDicts[j].aLoc) k = j;
553 : }
554 :
555 : // if we have a hyphenation dictionary matching this locale
556 0 : if (k != -1)
557 : {
558 0 : HyphenDict *dict = NULL;
559 : // if this dictioanry has not been loaded yet do that
560 0 : if (!aDicts[k].aPtr)
561 : {
562 0 : OUString DictFN = aDicts[k].aName + A2OU(".dic");
563 0 : OUString dictpath;
564 :
565 0 : osl::FileBase::getSystemPathFromFileURL( DictFN, dictpath );
566 0 : OString sTmp( OU2ENC( dictpath, osl_getThreadTextEncoding() ) );
567 :
568 : #if defined(WNT)
569 : // workaround for Windows specifc problem that the
570 : // path length in calls to 'fopen' is limted to somewhat
571 : // about 120+ characters which will usually be exceed when
572 : // using dictionaries as extensions.
573 : sTmp = Win_GetShortPathName( dictpath );
574 : #endif
575 :
576 0 : if ( ( dict = hnj_hyphen_load ( sTmp.getStr()) ) == NULL )
577 : {
578 0 : fprintf(stderr, "Couldn't find file %s and %s\n", sTmp.getStr(), OU2ENC(dictpath, osl_getThreadTextEncoding()) );
579 0 : return NULL;
580 : }
581 0 : aDicts[k].aPtr = dict;
582 0 : aDicts[k].eEnc = getTextEncodingFromCharset(dict->cset);
583 : }
584 :
585 : // other wise hyphenate the word with that dictionary
586 0 : dict = aDicts[k].aPtr;
587 0 : rtl_TextEncoding eEnc = aDicts[k].eEnc;
588 0 : CharClass* pCC = aDicts[k].apCC;
589 :
590 : // we don't want to work with a default text encoding since following incorrect
591 : // results may occur only for specific text and thus may be hard to notice.
592 : // Thus better always make a clean exit here if the text encoding is in question.
593 : // Hopefully something not working at all will raise proper attention quickly. ;-)
594 : DBG_ASSERT( eEnc != RTL_TEXTENCODING_DONTKNOW, "failed to get text encoding! (maybe incorrect encoding string in file)" );
595 0 : if (eEnc == RTL_TEXTENCODING_DONTKNOW)
596 0 : return NULL;
597 :
598 : // first handle smart quotes both single and double
599 0 : OUStringBuffer rBuf(aWord);
600 0 : sal_Int32 nc = rBuf.getLength();
601 : sal_Unicode ch;
602 0 : for (sal_Int32 ix=0; ix < nc; ix++)
603 : {
604 0 : ch = rBuf[ix];
605 0 : if ((ch == 0x201C) || (ch == 0x201D))
606 0 : rBuf[ix] = (sal_Unicode)0x0022;
607 0 : if ((ch == 0x2018) || (ch == 0x2019))
608 0 : rBuf[ix] = (sal_Unicode)0x0027;
609 : }
610 0 : OUString nWord(rBuf.makeStringAndClear());
611 :
612 : // now convert word to all lowercase for pattern recognition
613 0 : OUString nTerm(makeLowerCase(nWord, pCC));
614 :
615 : // now convert word to needed encoding
616 0 : OString encWord(OU2ENC(nTerm,eEnc));
617 :
618 0 : int wordlen = encWord.getLength();
619 0 : char *lcword = new char[wordlen+1];
620 0 : char *hyphens = new char[wordlen+5];
621 0 : char ** rep = NULL; // replacements of discretionary hyphenation
622 0 : int * pos = NULL; // array of [hyphenation point] minus [deletion position]
623 0 : int * cut = NULL; // length of deletions in original word
624 :
625 : // copy converted word into simple char buffer
626 0 : strcpy(lcword,encWord.getStr());
627 :
628 : // first remove any trailing periods
629 0 : int n = wordlen-1;
630 0 : while((n >=0) && (lcword[n] == '.'))
631 0 : n--;
632 0 : n++;
633 0 : if (n > 0)
634 : {
635 : const bool bFailed = 0 != hnj_hyphen_hyphenate3(dict, lcword, n, hyphens, NULL,
636 : &rep, &pos, &cut, minLead, minTrail,
637 : Max(dict->clhmin, Max(dict->clhmin, 2) + Max(0, minLead - Max(dict->lhmin, 2))),
638 0 : Max(dict->crhmin, Max(dict->crhmin, 2) + Max(0, minTrail - Max(dict->rhmin, 2))) );
639 0 : if (bFailed)
640 : {
641 0 : delete[] hyphens;
642 0 : delete[] lcword;
643 :
644 0 : if (rep)
645 : {
646 0 : for(int j = 0; j < n; j++)
647 : {
648 0 : if (rep[j]) free(rep[j]);
649 : }
650 0 : free(rep);
651 : }
652 0 : if (pos) free(pos);
653 0 : if (cut) free(cut);
654 :
655 0 : return NULL;
656 : }
657 : }
658 : // now backfill hyphens[] for any removed periods
659 0 : for (int c = n; c < wordlen; c++)
660 0 : hyphens[c] = '0';
661 0 : hyphens[wordlen] = '\0';
662 :
663 0 : sal_Int16 nHyphCount = 0;
664 : sal_Int16 i;
665 :
666 0 : for ( i = 0; i < encWord.getLength(); i++)
667 : {
668 0 : if (hyphens[i]&1 && (!rep || !rep[i]))
669 0 : nHyphCount++;
670 : }
671 :
672 0 : Sequence< sal_Int16 > aHyphPos(nHyphCount);
673 0 : sal_Int16 *pPos = aHyphPos.getArray();
674 0 : OUStringBuffer hyphenatedWordBuffer;
675 0 : nHyphCount = 0;
676 :
677 0 : for (i = 0; i < nWord.getLength(); i++)
678 : {
679 0 : hyphenatedWordBuffer.append(aWord[i]);
680 : // hyphenation position (not alternative)
681 0 : if (hyphens[i]&1 && (!rep || !rep[i]))
682 : {
683 0 : pPos[nHyphCount] = i;
684 0 : hyphenatedWordBuffer.append(sal_Unicode('='));
685 0 : nHyphCount++;
686 : }
687 : }
688 :
689 0 : OUString hyphenatedWord = hyphenatedWordBuffer.makeStringAndClear();
690 :
691 : Reference< XPossibleHyphens > xRes = PossibleHyphens::CreatePossibleHyphens(
692 0 : aWord, LanguageTag( aLocale ).getLanguageType(), hyphenatedWord, aHyphPos);
693 :
694 0 : delete[] hyphens;
695 0 : delete[] lcword;
696 :
697 0 : if (rep)
698 : {
699 0 : for(int j = 0; j < n; j++)
700 : {
701 0 : if (rep[j]) free(rep[j]);
702 : }
703 0 : free(rep);
704 : }
705 0 : if (pos) free(pos);
706 0 : if (cut) free(cut);
707 :
708 0 : return xRes;
709 : }
710 :
711 0 : return NULL;
712 : }
713 :
714 0 : sal_uInt16 SAL_CALL Hyphenator::capitalType(const OUString& aTerm, CharClass * pCC)
715 : {
716 0 : sal_Int32 tlen = aTerm.getLength();
717 0 : if ((pCC) && (tlen))
718 : {
719 0 : String aStr(aTerm);
720 0 : sal_Int32 nc = 0;
721 0 : for (xub_StrLen tindex = 0; tindex < tlen; tindex++)
722 : {
723 0 : if (pCC->getCharacterType(aStr,tindex) & ::com::sun::star::i18n::KCharacterType::UPPER)
724 0 : nc++;
725 : }
726 :
727 0 : if (nc == 0)
728 0 : return (sal_uInt16) CAPTYPE_NOCAP;
729 0 : if (nc == tlen)
730 0 : return (sal_uInt16) CAPTYPE_ALLCAP;
731 0 : if ((nc == 1) && (pCC->getCharacterType(aStr,0) & ::com::sun::star::i18n::KCharacterType::UPPER))
732 0 : return (sal_uInt16) CAPTYPE_INITCAP;
733 :
734 0 : return (sal_uInt16) CAPTYPE_MIXED;
735 : }
736 0 : return (sal_uInt16) CAPTYPE_UNKNOWN;
737 : }
738 :
739 0 : OUString SAL_CALL Hyphenator::makeLowerCase(const OUString& aTerm, CharClass * pCC)
740 : {
741 0 : if (pCC)
742 0 : return pCC->lowercase(aTerm);
743 0 : return aTerm;
744 : }
745 :
746 0 : OUString SAL_CALL Hyphenator::makeUpperCase(const OUString& aTerm, CharClass * pCC)
747 : {
748 0 : if (pCC)
749 0 : return pCC->uppercase(aTerm);
750 0 : return aTerm;
751 : }
752 :
753 :
754 0 : OUString SAL_CALL Hyphenator::makeInitCap(const OUString& aTerm, CharClass * pCC)
755 : {
756 0 : sal_Int32 tlen = aTerm.getLength();
757 0 : if ((pCC) && (tlen))
758 : {
759 0 : OUString bTemp = aTerm.copy(0,1);
760 0 : if (tlen > 1)
761 0 : return ( pCC->uppercase(bTemp, 0, 1) + pCC->lowercase(aTerm,1,(tlen-1)) );
762 :
763 0 : return pCC->uppercase(bTemp, 0, 1);
764 : }
765 0 : return aTerm;
766 : }
767 :
768 :
769 1 : Reference< XInterface > SAL_CALL Hyphenator_CreateInstance(
770 : const Reference< XMultiServiceFactory > & /*rSMgr*/ )
771 : throw(Exception)
772 : {
773 1 : Reference< XInterface > xService = (cppu::OWeakObject*) new Hyphenator;
774 1 : return xService;
775 : }
776 :
777 :
778 0 : sal_Bool SAL_CALL Hyphenator::addLinguServiceEventListener(
779 : const Reference< XLinguServiceEventListener >& rxLstnr )
780 : throw(RuntimeException)
781 : {
782 0 : MutexGuard aGuard( GetLinguMutex() );
783 :
784 0 : sal_Bool bRes = sal_False;
785 0 : if (!bDisposing && rxLstnr.is())
786 : {
787 0 : bRes = GetPropHelper().addLinguServiceEventListener( rxLstnr );
788 : }
789 0 : return bRes;
790 : }
791 :
792 :
793 0 : sal_Bool SAL_CALL Hyphenator::removeLinguServiceEventListener(
794 : const Reference< XLinguServiceEventListener >& rxLstnr )
795 : throw(RuntimeException)
796 : {
797 0 : MutexGuard aGuard( GetLinguMutex() );
798 :
799 0 : sal_Bool bRes = sal_False;
800 0 : if (!bDisposing && rxLstnr.is())
801 : {
802 0 : bRes = GetPropHelper().removeLinguServiceEventListener( rxLstnr );
803 : }
804 0 : return bRes;
805 : }
806 :
807 :
808 0 : OUString SAL_CALL Hyphenator::getServiceDisplayName( const Locale& /*rLocale*/ )
809 : throw(RuntimeException)
810 : {
811 0 : MutexGuard aGuard( GetLinguMutex() );
812 0 : return A2OU( "Libhyphen Hyphenator" );
813 : }
814 :
815 :
816 0 : void SAL_CALL Hyphenator::initialize( const Sequence< Any >& rArguments )
817 : throw(Exception, RuntimeException)
818 : {
819 0 : MutexGuard aGuard( GetLinguMutex() );
820 :
821 0 : if (!pPropHelper)
822 : {
823 0 : sal_Int32 nLen = rArguments.getLength();
824 0 : if (2 == nLen)
825 : {
826 0 : Reference< XPropertySet > xPropSet;
827 0 : rArguments.getConstArray()[0] >>= xPropSet;
828 : //rArguments.getConstArray()[1] >>= xDicList;
829 :
830 : //! Pointer allows for access of the non-UNO functions.
831 : //! And the reference to the UNO-functions while increasing
832 : //! the ref-count and will implicitly free the memory
833 : //! when the object is not longer used.
834 0 : pPropHelper = new PropertyHelper_Hyphenation( (XHyphenator *) this, xPropSet );
835 0 : pPropHelper->AddAsPropListener(); //! after a reference is established
836 : }
837 : else {
838 : OSL_FAIL( "wrong number of arguments in sequence" );
839 : }
840 0 : }
841 0 : }
842 :
843 :
844 1 : void SAL_CALL Hyphenator::dispose()
845 : throw(RuntimeException)
846 : {
847 1 : MutexGuard aGuard( GetLinguMutex() );
848 :
849 1 : if (!bDisposing)
850 : {
851 1 : bDisposing = true;
852 1 : EventObject aEvtObj( (XHyphenator *) this );
853 1 : aEvtListeners.disposeAndClear( aEvtObj );
854 1 : if (pPropHelper)
855 : {
856 0 : pPropHelper->RemoveAsPropListener();
857 0 : delete pPropHelper;
858 0 : pPropHelper = NULL;
859 1 : }
860 1 : }
861 1 : }
862 :
863 :
864 0 : void SAL_CALL Hyphenator::addEventListener( const Reference< XEventListener >& rxListener )
865 : throw(RuntimeException)
866 : {
867 0 : MutexGuard aGuard( GetLinguMutex() );
868 :
869 0 : if (!bDisposing && rxListener.is())
870 0 : aEvtListeners.addInterface( rxListener );
871 0 : }
872 :
873 :
874 0 : void SAL_CALL Hyphenator::removeEventListener( const Reference< XEventListener >& rxListener )
875 : throw(RuntimeException)
876 : {
877 0 : MutexGuard aGuard( GetLinguMutex() );
878 :
879 0 : if (!bDisposing && rxListener.is())
880 0 : aEvtListeners.removeInterface( rxListener );
881 0 : }
882 :
883 :
884 : ///////////////////////////////////////////////////////////////////////////
885 : // Service specific part
886 : //
887 :
888 1 : OUString SAL_CALL Hyphenator::getImplementationName()
889 : throw(RuntimeException)
890 : {
891 1 : MutexGuard aGuard( GetLinguMutex() );
892 :
893 1 : return getImplementationName_Static();
894 : }
895 :
896 :
897 0 : sal_Bool SAL_CALL Hyphenator::supportsService( const OUString& ServiceName )
898 : throw(RuntimeException)
899 : {
900 0 : MutexGuard aGuard( GetLinguMutex() );
901 :
902 0 : Sequence< OUString > aSNL = getSupportedServiceNames();
903 0 : const OUString * pArray = aSNL.getConstArray();
904 0 : for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
905 0 : if( pArray[i] == ServiceName )
906 0 : return sal_True;
907 0 : return sal_False;
908 : }
909 :
910 :
911 0 : Sequence< OUString > SAL_CALL Hyphenator::getSupportedServiceNames()
912 : throw(RuntimeException)
913 : {
914 0 : MutexGuard aGuard( GetLinguMutex() );
915 :
916 0 : return getSupportedServiceNames_Static();
917 : }
918 :
919 :
920 1 : Sequence< OUString > Hyphenator::getSupportedServiceNames_Static()
921 : throw()
922 : {
923 1 : MutexGuard aGuard( GetLinguMutex() );
924 :
925 1 : Sequence< OUString > aSNS( 1 ); // auch mehr als 1 Service moeglich
926 1 : aSNS.getArray()[0] = A2OU( SN_HYPHENATOR );
927 1 : return aSNS;
928 : }
929 :
930 1 : void * SAL_CALL Hyphenator_getFactory( const sal_Char * pImplName,
931 : XMultiServiceFactory * pServiceManager, void * )
932 : {
933 1 : void * pRet = 0;
934 1 : if ( !Hyphenator::getImplementationName_Static().compareToAscii( pImplName ) )
935 : {
936 : Reference< XSingleServiceFactory > xFactory =
937 : cppu::createOneInstanceFactory(
938 : pServiceManager,
939 : Hyphenator::getImplementationName_Static(),
940 : Hyphenator_CreateInstance,
941 1 : Hyphenator::getSupportedServiceNames_Static());
942 : // acquire, because we return an interface pointer instead of a reference
943 1 : xFactory->acquire();
944 1 : pRet = xFactory.get();
945 : }
946 1 : return pRet;
947 : }
948 :
949 :
950 : ///////////////////////////////////////////////////////////////////////////
951 :
952 : #undef CAPTYPE_UNKNOWN
953 : #undef CAPTYPE_NOCAP
954 : #undef CAPTYPE_INITCAP
955 : #undef CAPTYPE_ALLCAP
956 : #undef CAPTYPE_MIXED
957 :
958 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|