Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : */
9 :
10 : #include "i18npool/languagetag.hxx"
11 : #include "i18npool/mslangid.hxx"
12 : #include <rtl/ustrbuf.hxx>
13 : #include <rtl/bootstrap.hxx>
14 : #include <osl/file.hxx>
15 : #include <rtl/instance.hxx>
16 : #include <rtl/locale.h>
17 :
18 : //#define erDEBUG
19 :
20 : #if defined(ENABLE_LIBLANGTAG)
21 : #include <liblangtag/langtag.h>
22 : #else
23 : /* Replacement code for LGPL phobic and Android systems.
24 : * For iOS we could probably use NSLocale instead, that should have more or
25 : * less required functionality. If it is good enough, it could be used for Mac
26 : * OS X, too.
27 : */
28 : #include "simple-langtag.cxx"
29 : #endif
30 :
31 : using rtl::OUString;
32 : using rtl::OString;
33 : using rtl::OUStringBuffer;
34 : using namespace com::sun::star;
35 :
36 : // The actual pointer type of mpImplLangtag that is declared void* to not
37 : // pollute the entire code base with liblangtag.
38 : #define LANGTAGCAST(p) (reinterpret_cast<lt_tag_t*>(p))
39 : #define MPLANGTAG LANGTAGCAST(mpImplLangtag)
40 :
41 : /** Convention to signal presence of BCP 47 language tag in a Locale's Variant
42 : field. The Locale's Language field then will contain this ISO 639-2
43 : reserved for local use code. */
44 : #define ISO639_LANGUAGE_TAG "qlt"
45 :
46 :
47 : // "statics" to be returned as const reference to an empty locale and string.
48 : namespace {
49 : struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
50 : struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
51 : }
52 :
53 :
54 : /** A reference holder for liblangtag data de/initialization, one static
55 : instance. Currently implemented such that the first "ref" inits and dtor
56 : (our library deinitialized) tears down.
57 : */
58 : class LiblantagDataRef
59 : {
60 : public:
61 : LiblantagDataRef();
62 : ~LiblantagDataRef();
63 10 : inline void incRef()
64 : {
65 10 : if (mnRef != SAL_MAX_UINT32 && !mnRef++)
66 2 : setup();
67 10 : }
68 12 : inline void decRef()
69 : {
70 12 : if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef)
71 2 : teardown();
72 12 : }
73 : private:
74 : rtl::OString maDataPath; // path to liblangtag data, "|" if system
75 : sal_uInt32 mnRef;
76 :
77 : void setupDataPath();
78 : void setup();
79 : void teardown();
80 : };
81 :
82 : namespace {
83 : struct theDataRef : public rtl::Static< LiblantagDataRef, theDataRef > {};
84 : }
85 :
86 2 : LiblantagDataRef::LiblantagDataRef()
87 : :
88 2 : mnRef(0)
89 : {
90 2 : }
91 :
92 4 : LiblantagDataRef::~LiblantagDataRef()
93 : {
94 : // When destructed we're tearing down unconditionally.
95 2 : if (mnRef)
96 2 : mnRef = 1;
97 2 : decRef();
98 2 : }
99 :
100 2 : void LiblantagDataRef::setup()
101 : {
102 : SAL_INFO( "i18npool.langtag", "LiblantagDataRef::setup: initializing database");
103 2 : if (maDataPath.isEmpty())
104 2 : setupDataPath();
105 2 : lt_db_initialize();
106 : // Hold ref eternally.
107 2 : mnRef = SAL_MAX_UINT32;
108 2 : }
109 :
110 2 : void LiblantagDataRef::teardown()
111 : {
112 : SAL_INFO( "i18npool.langtag", "LiblantagDataRef::teardown: finalizing database");
113 2 : lt_db_finalize();
114 2 : }
115 :
116 2 : void LiblantagDataRef::setupDataPath()
117 : {
118 : // maDataPath is assumed to be empty here.
119 2 : OUString aURL("$BRAND_BASE_DIR/share/liblangtag");
120 2 : rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
121 :
122 : // Check if data is in our own installation, else assume system
123 : // installation.
124 2 : OUString aData( aURL);
125 2 : aData += "/language-subtag-registry.xml";
126 2 : osl::DirectoryItem aDirItem;
127 2 : if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
128 : {
129 2 : OUString aPath;
130 2 : if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
131 2 : maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
132 : }
133 2 : if (maDataPath.isEmpty())
134 0 : maDataPath = "|"; // assume system
135 : else
136 2 : lt_db_set_datadir( maDataPath.getStr());
137 2 : }
138 :
139 507 : LanguageTag::LanguageTag( const rtl::OUString & rBcp47LanguageTag, bool bCanonicalize )
140 : :
141 : maBcp47( rBcp47LanguageTag),
142 : mpImplLangtag( NULL),
143 : mnLangID( LANGUAGE_DONTKNOW),
144 : meIsValid( DECISION_DONTKNOW),
145 : meIsIsoLocale( DECISION_DONTKNOW),
146 : meIsIsoODF( DECISION_DONTKNOW),
147 : meIsLiblangtagNeeded( DECISION_DONTKNOW),
148 507 : mbSystemLocale( rBcp47LanguageTag.isEmpty()),
149 507 : mbInitializedBcp47( !mbSystemLocale),
150 : mbInitializedLocale( false),
151 : mbInitializedLangID( false),
152 : mbCachedLanguage( false),
153 : mbCachedScript( false),
154 : mbCachedCountry( false),
155 1521 : mbIsFallback( false)
156 : {
157 507 : if (bCanonicalize)
158 6 : canonicalize();
159 507 : }
160 :
161 :
162 20554 : LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
163 : :
164 : maLocale( rLocale),
165 : mpImplLangtag( NULL),
166 : mnLangID( LANGUAGE_DONTKNOW),
167 : meIsValid( DECISION_DONTKNOW),
168 : meIsIsoLocale( DECISION_DONTKNOW),
169 : meIsIsoODF( DECISION_DONTKNOW),
170 : meIsLiblangtagNeeded( DECISION_DONTKNOW),
171 20554 : mbSystemLocale( rLocale.Language.isEmpty()),
172 : mbInitializedBcp47( false),
173 20554 : mbInitializedLocale( !mbSystemLocale),
174 : mbInitializedLangID( false),
175 : mbCachedLanguage( false),
176 : mbCachedScript( false),
177 : mbCachedCountry( false),
178 61662 : mbIsFallback( false)
179 : {
180 20554 : }
181 :
182 :
183 36523 : LanguageTag::LanguageTag( LanguageType nLanguage )
184 : :
185 : mpImplLangtag( NULL),
186 : mnLangID( nLanguage),
187 : meIsValid( DECISION_DONTKNOW),
188 : meIsIsoLocale( DECISION_DONTKNOW),
189 : meIsIsoODF( DECISION_DONTKNOW),
190 : meIsLiblangtagNeeded( DECISION_DONTKNOW),
191 : mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
192 : mbInitializedBcp47( false),
193 : mbInitializedLocale( false),
194 36523 : mbInitializedLangID( !mbSystemLocale),
195 : mbCachedLanguage( false),
196 : mbCachedScript( false),
197 : mbCachedCountry( false),
198 73046 : mbIsFallback( false)
199 : {
200 36523 : }
201 :
202 :
203 94 : LanguageTag::LanguageTag( const rtl::OUString& rLanguage, const rtl::OUString& rCountry )
204 : :
205 : maLocale( rLanguage, rCountry, ""),
206 : mpImplLangtag( NULL),
207 : mnLangID( LANGUAGE_DONTKNOW),
208 : meIsValid( DECISION_DONTKNOW),
209 : meIsIsoLocale( DECISION_DONTKNOW),
210 : meIsIsoODF( DECISION_DONTKNOW),
211 : meIsLiblangtagNeeded( DECISION_DONTKNOW),
212 94 : mbSystemLocale( rLanguage.isEmpty()),
213 : mbInitializedBcp47( false),
214 94 : mbInitializedLocale( !mbSystemLocale),
215 : mbInitializedLangID( false),
216 : mbCachedLanguage( false),
217 : mbCachedScript( false),
218 : mbCachedCountry( false),
219 282 : mbIsFallback( false)
220 : {
221 94 : }
222 :
223 :
224 3160 : LanguageTag::LanguageTag( const rtl_Locale & rLocale )
225 : :
226 : maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
227 : mpImplLangtag( NULL),
228 : mnLangID( LANGUAGE_DONTKNOW),
229 : meIsValid( DECISION_DONTKNOW),
230 : meIsIsoLocale( DECISION_DONTKNOW),
231 : meIsIsoODF( DECISION_DONTKNOW),
232 : meIsLiblangtagNeeded( DECISION_DONTKNOW),
233 3160 : mbSystemLocale( maLocale.Language.isEmpty()),
234 : mbInitializedBcp47( false),
235 3160 : mbInitializedLocale( !mbSystemLocale),
236 : mbInitializedLangID( false),
237 : mbCachedLanguage( false),
238 : mbCachedScript( false),
239 : mbCachedCountry( false),
240 9480 : mbIsFallback( false)
241 : {
242 3160 : }
243 :
244 :
245 29750 : LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
246 : :
247 : maLocale( rLanguageTag.maLocale),
248 : maBcp47( rLanguageTag.maBcp47),
249 : maCachedLanguage( rLanguageTag.maCachedLanguage),
250 : maCachedScript( rLanguageTag.maCachedScript),
251 : maCachedCountry( rLanguageTag.maCachedCountry),
252 : mpImplLangtag( rLanguageTag.mpImplLangtag ?
253 0 : lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL),
254 : mnLangID( rLanguageTag.mnLangID),
255 : meIsValid( rLanguageTag.meIsValid),
256 : meIsIsoLocale( rLanguageTag.meIsIsoLocale),
257 : meIsIsoODF( rLanguageTag.meIsIsoODF),
258 : meIsLiblangtagNeeded( rLanguageTag.meIsLiblangtagNeeded),
259 : mbSystemLocale( rLanguageTag.mbSystemLocale),
260 : mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
261 : mbInitializedLocale( rLanguageTag.mbInitializedLocale),
262 : mbInitializedLangID( rLanguageTag.mbInitializedLangID),
263 : mbCachedLanguage( rLanguageTag.mbCachedLanguage),
264 : mbCachedScript( rLanguageTag.mbCachedScript),
265 : mbCachedCountry( rLanguageTag.mbCachedCountry),
266 29750 : mbIsFallback( rLanguageTag.mbIsFallback)
267 : {
268 29750 : if (mpImplLangtag)
269 0 : theDataRef::get().incRef();
270 29750 : }
271 :
272 :
273 14005 : LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
274 : {
275 14005 : maLocale = rLanguageTag.maLocale;
276 14005 : maBcp47 = rLanguageTag.maBcp47;
277 14005 : maCachedLanguage = rLanguageTag.maCachedLanguage;
278 14005 : maCachedScript = rLanguageTag.maCachedScript;
279 14005 : maCachedCountry = rLanguageTag.maCachedCountry;
280 14005 : mpImplLangtag = rLanguageTag.mpImplLangtag;
281 : mpImplLangtag = rLanguageTag.mpImplLangtag ?
282 14005 : lt_tag_copy( LANGTAGCAST( rLanguageTag.mpImplLangtag)) : NULL;
283 14005 : mnLangID = rLanguageTag.mnLangID;
284 14005 : meIsValid = rLanguageTag.meIsValid;
285 14005 : meIsIsoLocale = rLanguageTag.meIsIsoLocale;
286 14005 : meIsIsoODF = rLanguageTag.meIsIsoODF;
287 14005 : meIsLiblangtagNeeded= rLanguageTag.meIsLiblangtagNeeded;
288 14005 : mbSystemLocale = rLanguageTag.mbSystemLocale;
289 14005 : mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47;
290 14005 : mbInitializedLocale = rLanguageTag.mbInitializedLocale;
291 14005 : mbInitializedLangID = rLanguageTag.mbInitializedLangID;
292 14005 : mbCachedLanguage = rLanguageTag.mbCachedLanguage;
293 14005 : mbCachedScript = rLanguageTag.mbCachedScript;
294 14005 : mbCachedCountry = rLanguageTag.mbCachedCountry;
295 14005 : mbIsFallback = rLanguageTag.mbIsFallback;
296 14005 : if (mpImplLangtag)
297 0 : theDataRef::get().incRef();
298 14005 : return *this;
299 : }
300 :
301 :
302 175016 : LanguageTag::~LanguageTag()
303 : {
304 87508 : if (mpImplLangtag)
305 : {
306 10 : lt_tag_unref( MPLANGTAG);
307 10 : theDataRef::get().decRef();
308 : }
309 87508 : }
310 :
311 :
312 4493 : void LanguageTag::resetVars()
313 : {
314 4493 : if (mpImplLangtag)
315 : {
316 0 : lt_tag_unref( MPLANGTAG);
317 0 : mpImplLangtag = NULL;
318 0 : theDataRef::get().decRef();
319 : }
320 :
321 4493 : maLocale = lang::Locale();
322 4493 : if (!maBcp47.isEmpty())
323 6 : maBcp47 = OUString();
324 4493 : if (!maCachedLanguage.isEmpty())
325 5 : maCachedLanguage= OUString();
326 4493 : if (!maCachedScript.isEmpty())
327 0 : maCachedScript = OUString();
328 4493 : if (!maCachedCountry.isEmpty())
329 5 : maCachedCountry = OUString();
330 4493 : mnLangID = LANGUAGE_DONTKNOW;
331 4493 : meIsValid = DECISION_DONTKNOW;
332 4493 : meIsIsoLocale = DECISION_DONTKNOW;
333 4493 : meIsIsoODF = DECISION_DONTKNOW;
334 4493 : meIsLiblangtagNeeded= DECISION_DONTKNOW;
335 4493 : mbSystemLocale = true;
336 4493 : mbInitializedBcp47 = false;
337 4493 : mbInitializedLocale = false;
338 4493 : mbInitializedLangID = false;
339 4493 : mbCachedLanguage = false;
340 4493 : mbCachedScript = false;
341 4493 : mbCachedCountry = false;
342 4493 : mbIsFallback = false;
343 4493 : }
344 :
345 :
346 63 : void LanguageTag::reset( const rtl::OUString & rBcp47LanguageTag, bool bCanonicalize )
347 : {
348 63 : resetVars();
349 63 : maBcp47 = rBcp47LanguageTag;
350 63 : mbSystemLocale = rBcp47LanguageTag.isEmpty();
351 63 : mbInitializedBcp47 = !mbSystemLocale;
352 :
353 63 : if (bCanonicalize)
354 0 : canonicalize();
355 63 : }
356 :
357 :
358 1 : void LanguageTag::reset( const com::sun::star::lang::Locale & rLocale )
359 : {
360 1 : resetVars();
361 1 : maLocale = rLocale;
362 1 : mbSystemLocale = rLocale.Language.isEmpty();
363 1 : mbInitializedLocale = !mbSystemLocale;
364 1 : }
365 :
366 :
367 4429 : void LanguageTag::reset( LanguageType nLanguage )
368 : {
369 4429 : resetVars();
370 4429 : mnLangID = nLanguage;
371 4429 : mbSystemLocale = nLanguage == LANGUAGE_SYSTEM;
372 4429 : mbInitializedLangID = !mbSystemLocale;
373 4429 : }
374 :
375 :
376 7304 : bool LanguageTag::canonicalize()
377 : {
378 : #ifdef erDEBUG
379 : // dump once
380 : struct dumper
381 : {
382 : void** mpp;
383 : dumper( void** pp ) : mpp( *pp ? NULL : pp) {}
384 : ~dumper() { if (mpp && *mpp) lt_tag_dump( LANGTAGCAST( *mpp)); }
385 : };
386 : dumper aDumper( &mpImplLangtag);
387 : #endif
388 :
389 : // Side effect: have maBcp47 in any case, resolved system.
390 : // Some methods calling canonicalize() (or not calling it due to
391 : // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
392 : // meIsLiblangtagNeeded anywhere else than hereafter.
393 7304 : getBcp47( true );
394 :
395 : // The simple cases and known locales don't need liblangtag processing,
396 : // which also avoids loading liblangtag data on startup.
397 7304 : if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
398 : {
399 7304 : bool bTemporaryLocale = false;
400 7304 : bool bTemporaryLangID = false;
401 7304 : if (!mbInitializedLocale && !mbInitializedLangID)
402 : {
403 551 : if (mbSystemLocale)
404 : {
405 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
406 0 : mbInitializedLangID = true;
407 : }
408 : else
409 : {
410 : // Now this is getting funny.. we only have some BCP47 string
411 : // and want to determine if parsing it would be possible
412 : // without using liblangtag just to see if it is a simple known
413 : // locale.
414 551 : OUString aLanguage, aScript, aCountry;
415 551 : if (simpleExtract( maBcp47, aLanguage, aScript, aCountry))
416 : {
417 543 : if (aScript.isEmpty())
418 : {
419 541 : maLocale.Language = aLanguage;
420 541 : maLocale.Country = aCountry;
421 : }
422 : else
423 : {
424 2 : maLocale.Language = ISO639_LANGUAGE_TAG;
425 2 : maLocale.Country = aCountry;
426 2 : maLocale.Variant = maBcp47;
427 : }
428 543 : bTemporaryLocale = mbInitializedLocale = true;
429 551 : }
430 : }
431 : }
432 7304 : if (mbInitializedLangID && !mbInitializedLocale)
433 : {
434 : // Do not call getLocale() here because that prefers
435 : // convertBcp47ToLocale() which would end up in recursion via
436 : // isIsoLocale()!
437 :
438 : // Prepare to verify that we have a known locale, not just an
439 : // arbitrary MS-LangID.
440 0 : convertLangToLocale();
441 : }
442 7304 : if (mbInitializedLocale)
443 : {
444 7296 : if (maLocale.Variant.isEmpty())
445 4134 : meIsLiblangtagNeeded = DECISION_NO; // per definition ll[l][-CC]
446 : else
447 : {
448 3162 : if (!mbInitializedLangID)
449 : {
450 3162 : convertLocaleToLang();
451 3162 : if (bTemporaryLocale)
452 2 : bTemporaryLangID = true;
453 : }
454 3162 : if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
455 3160 : meIsLiblangtagNeeded = DECISION_NO; // known locale
456 : }
457 : }
458 7304 : if (bTemporaryLocale)
459 : {
460 543 : mbInitializedLocale = false;
461 543 : maLocale = lang::Locale();
462 : }
463 7304 : if (bTemporaryLangID)
464 : {
465 2 : mbInitializedLangID = false;
466 2 : mnLangID = LANGUAGE_DONTKNOW;
467 : }
468 : }
469 7304 : if (meIsLiblangtagNeeded == DECISION_NO)
470 : {
471 7294 : meIsValid = DECISION_YES; // really, known must be valid ...
472 7294 : return true; // that's it
473 : }
474 10 : meIsLiblangtagNeeded = DECISION_YES;
475 : SAL_INFO( "i18npool.langtag", "LanguageTag::canonicalize: using liblangtag for " << maBcp47);
476 :
477 10 : if (!mpImplLangtag)
478 : {
479 10 : theDataRef::get().incRef();
480 10 : mpImplLangtag = lt_tag_new();
481 : }
482 :
483 : // ensure error is free'd
484 : struct myerror
485 : {
486 : lt_error_t* p;
487 10 : myerror() : p(NULL) {}
488 10 : ~myerror() { if (p) lt_error_unref( p); }
489 10 : } aError;
490 :
491 10 : if (lt_tag_parse( MPLANGTAG, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
492 : {
493 3 : char* pTag = lt_tag_canonicalize( MPLANGTAG, &aError.p);
494 : SAL_WARN_IF( !pTag, "i18npool.langtag", "LanguageTag::canonicalize: could not canonicalize " << maBcp47);
495 3 : if (pTag)
496 : {
497 3 : OUString aOld( maBcp47);
498 3 : maBcp47 = OUString::createFromAscii( pTag);
499 : // Make the lt_tag_t follow the new string if different, which
500 : // removes default script and such.
501 3 : if (maBcp47 != aOld)
502 : {
503 2 : if (!lt_tag_parse( MPLANGTAG, pTag, &aError.p))
504 : {
505 : SAL_WARN( "i18npool.langtag", "LanguageTag::canonicalize: could not reparse " << maBcp47);
506 0 : free( pTag);
507 0 : meIsValid = DECISION_NO;
508 0 : return false;
509 : }
510 : }
511 3 : free( pTag);
512 3 : meIsValid = DECISION_YES;
513 3 : return true;
514 : }
515 : }
516 : else
517 : {
518 : SAL_INFO( "i18npool.langtag", "LanguageTag::canonicalize: could not parse " << maBcp47);
519 : }
520 7 : meIsValid = DECISION_NO;
521 7 : return false;
522 : }
523 :
524 :
525 7129 : void LanguageTag::convertLocaleToBcp47()
526 : {
527 7129 : if (mbSystemLocale && !mbInitializedLocale)
528 0 : convertLangToLocale();
529 :
530 7129 : if (maLocale.Language == ISO639_LANGUAGE_TAG)
531 : {
532 0 : maBcp47 = maLocale.Variant;
533 0 : meIsIsoLocale = DECISION_NO;
534 : }
535 : else
536 : {
537 : /* XXX NOTE: most legacy code never evaluated the Variant field, so for
538 : * now just concatenate language and country. In case we stumbled over
539 : * variant aware code we'd have to take care of that. */
540 7129 : if (maLocale.Country.isEmpty())
541 10 : maBcp47 = maLocale.Language;
542 : else
543 : {
544 7119 : OUStringBuffer aBuf( maLocale.Language.getLength() + 1 + maLocale.Country.getLength());
545 7119 : aBuf.append( maLocale.Language).append( '-').append( maLocale.Country);
546 7119 : maBcp47 = aBuf.makeStringAndClear();
547 : }
548 : }
549 7129 : mbInitializedBcp47 = true;
550 7129 : }
551 :
552 :
553 16883 : void LanguageTag::convertLocaleToLang()
554 : {
555 16883 : if (mbSystemLocale)
556 : {
557 104 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
558 : }
559 : else
560 : {
561 : /* FIXME: this is temporary until code base is converted to not use
562 : * MsLangId::convert...() anymore. After that, proper new method has to
563 : * be implemented to allow ISO639_LANGUAGE_TAG and sript tag and such. */
564 16779 : mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
565 : }
566 16883 : mbInitializedLangID = true;
567 16883 : }
568 :
569 :
570 551 : void LanguageTag::convertBcp47ToLocale()
571 : {
572 551 : bool bIso = isIsoLocale();
573 551 : if (bIso)
574 : {
575 549 : maLocale.Language = getLanguageFromLangtag();
576 549 : maLocale.Country = getRegionFromLangtag();
577 549 : maLocale.Variant = OUString();
578 : }
579 : else
580 : {
581 2 : maLocale.Language = ISO639_LANGUAGE_TAG;
582 2 : maLocale.Country = getCountry();
583 2 : maLocale.Variant = maBcp47;
584 : }
585 551 : mbInitializedLocale = true;
586 551 : }
587 :
588 :
589 78 : void LanguageTag::convertBcp47ToLang()
590 : {
591 78 : if (mbSystemLocale)
592 : {
593 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
594 : }
595 : else
596 : {
597 : /* FIXME: this is temporary. If we support locales that consist not
598 : * only of language and country, e.g. added script, this probably needs
599 : * to be adapted. */
600 78 : if (!mbInitializedLocale)
601 67 : convertBcp47ToLocale();
602 78 : convertLocaleToLang();
603 : }
604 78 : mbInitializedLangID = true;
605 78 : }
606 :
607 :
608 44380 : void LanguageTag::convertLangToLocale()
609 : {
610 44380 : if (mbSystemLocale && !mbInitializedLangID)
611 : {
612 4456 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
613 4456 : mbInitializedLangID = true;
614 : }
615 : /* FIXME: this is temporary until code base is converted to not use
616 : * MsLangId::convert...() anymore. After that, proper new method has to be
617 : * implemented to allow ISO639_LANGUAGE_TAG and script tag and such. */
618 : // Resolve system here!
619 44380 : maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true);
620 44380 : mbInitializedLocale = true;
621 44380 : }
622 :
623 :
624 3965 : void LanguageTag::convertLangToBcp47()
625 : {
626 : /* FIXME: this is temporary. If we support locales that consist not only of
627 : * language and country, e.g. added script, this probably needs to be
628 : * adapted. */
629 3965 : if (!mbInitializedLocale)
630 3965 : convertLangToLocale();
631 3965 : convertLocaleToBcp47();
632 3965 : mbInitializedBcp47 = true;
633 3965 : }
634 :
635 :
636 30079 : const rtl::OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
637 : {
638 30079 : if (!bResolveSystem && mbSystemLocale)
639 38 : return theEmptyBcp47::get();
640 30041 : if (!mbInitializedBcp47)
641 : {
642 7129 : if (mbInitializedLocale)
643 3164 : const_cast<LanguageTag*>(this)->convertLocaleToBcp47();
644 : else
645 3965 : const_cast<LanguageTag*>(this)->convertLangToBcp47();
646 : }
647 30041 : return maBcp47;
648 : }
649 :
650 :
651 7852 : rtl::OUString LanguageTag::getLanguageFromLangtag()
652 : {
653 7852 : OUString aLanguage;
654 7852 : if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
655 6640 : canonicalize();
656 7852 : if (maBcp47.isEmpty())
657 0 : return aLanguage;
658 7852 : if (mpImplLangtag)
659 : {
660 17 : const lt_lang_t* pLangT = lt_tag_get_language( MPLANGTAG);
661 : SAL_WARN_IF( !pLangT, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLangT==NULL");
662 17 : if (!pLangT)
663 0 : return aLanguage;
664 17 : const char* pLang = lt_lang_get_tag( pLangT);
665 : SAL_WARN_IF( !pLang, "i18npool.langtag", "LanguageTag::getLanguageFromLangtag: pLang==NULL");
666 17 : if (pLang)
667 17 : aLanguage = OUString::createFromAscii( pLang);
668 : }
669 : else
670 : {
671 7835 : if (mbCachedLanguage || cacheSimpleLSC())
672 7835 : aLanguage = maCachedLanguage;
673 : }
674 7852 : return aLanguage;
675 : }
676 :
677 :
678 4 : rtl::OUString LanguageTag::getScriptFromLangtag()
679 : {
680 4 : OUString aScript;
681 4 : if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
682 0 : canonicalize();
683 4 : if (maBcp47.isEmpty())
684 0 : return aScript;
685 4 : if (mpImplLangtag)
686 : {
687 4 : const lt_script_t* pScriptT = lt_tag_get_script( MPLANGTAG);
688 : // pScriptT==NULL is valid for default scripts
689 4 : if (!pScriptT)
690 3 : return aScript;
691 1 : const char* pScript = lt_script_get_tag( pScriptT);
692 : SAL_WARN_IF( !pScript, "i18npool.langtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
693 1 : if (pScript)
694 1 : aScript = OUString::createFromAscii( pScript);
695 : }
696 : else
697 : {
698 0 : if (mbCachedScript || cacheSimpleLSC())
699 0 : aScript = maCachedScript;
700 : }
701 1 : return aScript;
702 : }
703 :
704 :
705 1215 : rtl::OUString LanguageTag::getRegionFromLangtag()
706 : {
707 1215 : OUString aRegion;
708 1215 : if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
709 0 : canonicalize();
710 1215 : if (maBcp47.isEmpty())
711 0 : return aRegion;
712 1215 : if (mpImplLangtag)
713 : {
714 20 : const lt_region_t* pRegionT = lt_tag_get_region( MPLANGTAG);
715 : // pRegionT==NULL is valid for language only tags, rough check here
716 : // that does not take sophisticated tags into account that actually
717 : // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
718 : // that ll-CC and lll-CC actually fail.
719 : SAL_WARN_IF( !pRegionT &&
720 : maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
721 : maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
722 : "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL");
723 20 : if (!pRegionT)
724 15 : return aRegion;
725 5 : const char* pRegion = lt_region_get_tag( pRegionT);
726 : SAL_WARN_IF( !pRegion, "i18npool.langtag", "LanguageTag::getRegionFromLangtag: pRegion==NULL");
727 5 : if (pRegion)
728 5 : aRegion = OUString::createFromAscii( pRegion);
729 : }
730 : else
731 : {
732 1195 : if (mbCachedCountry || cacheSimpleLSC())
733 1195 : aRegion = maCachedCountry;
734 : }
735 1200 : return aRegion;
736 : }
737 :
738 :
739 260591 : const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
740 : {
741 260591 : if (!bResolveSystem && mbSystemLocale)
742 12 : return theEmptyLocale::get();
743 260579 : if (!mbInitializedLocale)
744 : {
745 40899 : if (mbInitializedBcp47)
746 484 : const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
747 : else
748 40415 : const_cast<LanguageTag*>(this)->convertLangToLocale();
749 : }
750 260579 : return maLocale;
751 : }
752 :
753 :
754 354002 : LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
755 : {
756 354002 : if (!bResolveSystem && mbSystemLocale)
757 4961 : return LANGUAGE_SYSTEM;
758 349041 : if (!mbInitializedLangID)
759 : {
760 13721 : if (mbInitializedBcp47)
761 78 : const_cast<LanguageTag*>(this)->convertBcp47ToLang();
762 : else
763 13643 : const_cast<LanguageTag*>(this)->convertLocaleToLang();
764 : }
765 349041 : return mnLangID;
766 : }
767 :
768 :
769 113 : void LanguageTag::getIsoLanguageCountry( rtl::OUString& rLanguage, rtl::OUString& rCountry ) const
770 : {
771 113 : if (!isIsoLocale())
772 : {
773 0 : rLanguage = OUString();
774 0 : rCountry = OUString();
775 113 : return;
776 : }
777 : // After isIsoLocale() it's safe to call getLanguage() for ISO code.
778 113 : rLanguage = getLanguage();
779 113 : rCountry = getCountry();
780 : }
781 :
782 :
783 : namespace
784 : {
785 :
786 1341 : bool isLowerAscii( sal_Unicode c )
787 : {
788 1341 : return 'a' <= c && c <= 'z';
789 : }
790 :
791 1310 : bool isUpperAscii( sal_Unicode c )
792 : {
793 1310 : return 'A' <= c && c <= 'Z';
794 : }
795 :
796 : }
797 :
798 :
799 : // static
800 663 : bool LanguageTag::isIsoLanguage( const rtl::OUString& rLanguage )
801 : {
802 : /* TODO: ignore case? For now let's see where rubbish is used. */
803 : bool b2chars;
804 1998 : if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) &&
805 1326 : isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
806 9 : (b2chars || isLowerAscii( rLanguage[2])))
807 663 : return true;
808 : SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
809 : (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
810 : (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18npool.langtag",
811 : "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
812 0 : return false;
813 : }
814 :
815 :
816 : // static
817 666 : bool LanguageTag::isIsoCountry( const rtl::OUString& rRegion )
818 : {
819 : /* TODO: ignore case? For now let's see where rubbish is used. */
820 2628 : if (rRegion.isEmpty() ||
821 1962 : (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
822 666 : return true;
823 : SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
824 : "i18npool.langtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
825 0 : return false;
826 : }
827 :
828 :
829 : // static
830 6 : bool LanguageTag::isIsoScript( const rtl::OUString& rScript )
831 : {
832 : /* TODO: ignore case? For now let's see where rubbish is used. */
833 16 : if (rScript.isEmpty() ||
834 2 : (rScript.getLength() == 4 &&
835 4 : isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
836 4 : isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
837 6 : return true;
838 : SAL_WARN_IF( rScript.getLength() == 4 &&
839 : (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
840 : isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
841 : "i18npool.langtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
842 0 : return false;
843 : }
844 :
845 :
846 7432 : rtl::OUString LanguageTag::getLanguage() const
847 : {
848 7432 : if (!mbCachedLanguage)
849 : {
850 7303 : maCachedLanguage = const_cast<LanguageTag*>(this)->getLanguageFromLangtag();
851 7303 : mbCachedLanguage = true;
852 : }
853 7432 : return maCachedLanguage;
854 : }
855 :
856 :
857 3492 : rtl::OUString LanguageTag::getScript() const
858 : {
859 3492 : if (!mbCachedScript)
860 : {
861 4 : maCachedScript = const_cast<LanguageTag*>(this)->getScriptFromLangtag();
862 4 : mbCachedScript = true;
863 : }
864 3492 : return maCachedScript;
865 : }
866 :
867 :
868 3483 : rtl::OUString LanguageTag::getLanguageAndScript() const
869 : {
870 3483 : OUString aLanguageScript( getLanguage());
871 3483 : OUString aScript( getScript());
872 3483 : if (!aScript.isEmpty())
873 : {
874 1 : OUStringBuffer aBuf( aLanguageScript.getLength() + 1 + aScript.getLength());
875 1 : aBuf.append( aLanguageScript).append( '-').append( aScript);
876 1 : aLanguageScript = aBuf.makeStringAndClear();
877 : }
878 3483 : return aLanguageScript;
879 : }
880 :
881 :
882 3288 : rtl::OUString LanguageTag::getCountry() const
883 : {
884 3288 : if (!mbCachedCountry)
885 : {
886 3 : maCachedCountry = const_cast<LanguageTag*>(this)->getRegionFromLangtag();
887 3 : if (!isIsoCountry( maCachedCountry))
888 0 : maCachedCountry = OUString();
889 3 : mbCachedCountry = true;
890 : }
891 3288 : return maCachedCountry;
892 : }
893 :
894 :
895 663 : rtl::OUString LanguageTag::getRegion() const
896 : {
897 663 : return const_cast<LanguageTag*>(this)->getRegionFromLangtag();
898 : }
899 :
900 :
901 7294 : bool LanguageTag::cacheSimpleLSC()
902 : {
903 7294 : OUString aLanguage, aScript, aCountry;
904 7294 : bool bRet = simpleExtract( maBcp47, aLanguage, aScript, aCountry);
905 7294 : if (bRet)
906 : {
907 7294 : maCachedLanguage = aLanguage;
908 7294 : maCachedScript = aScript;
909 7294 : maCachedCountry = aCountry;
910 7294 : mbCachedLanguage = mbCachedScript = mbCachedCountry = true;
911 : }
912 7294 : return bRet;
913 : }
914 :
915 :
916 674 : bool LanguageTag::isIsoLocale() const
917 : {
918 674 : if (meIsIsoLocale == DECISION_DONTKNOW)
919 : {
920 664 : if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
921 658 : const_cast<LanguageTag*>(this)->canonicalize();
922 : // It must be at most ll-CC or lll-CC
923 : // Do not use getCountry() here, use getRegion() instead.
924 664 : meIsIsoLocale = ((maBcp47.isEmpty() ||
925 3976 : (maBcp47.getLength() <= 6 && isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()))) ?
926 4640 : DECISION_YES : DECISION_NO);
927 : }
928 674 : return meIsIsoLocale == DECISION_YES;
929 : }
930 :
931 :
932 5 : bool LanguageTag::isIsoODF() const
933 : {
934 5 : if (meIsIsoODF == DECISION_DONTKNOW)
935 : {
936 5 : if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
937 0 : const_cast<LanguageTag*>(this)->canonicalize();
938 5 : if (!isIsoScript( getScript()))
939 0 : return ((meIsIsoODF = DECISION_NO) == DECISION_YES);
940 : // The usual case is lll-CC so simply check that first.
941 5 : if (isIsoLocale())
942 3 : return ((meIsIsoODF = DECISION_YES) == DECISION_YES);
943 : // If this is not ISO locale for which script must not exist it can
944 : // still be ISO locale plus ISO script lll-Ssss-CC
945 2 : meIsIsoODF = ((maBcp47.getLength() <= 11 &&
946 8 : isIsoLanguage( getLanguage()) && isIsoCountry( getRegion()) && isIsoScript( getScript())) ?
947 10 : DECISION_YES : DECISION_NO);
948 : }
949 2 : return meIsIsoODF == DECISION_YES;
950 : }
951 :
952 :
953 5 : bool LanguageTag::isValidBcp47() const
954 : {
955 5 : if (meIsValid == DECISION_DONTKNOW)
956 : {
957 0 : if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
958 0 : const_cast<LanguageTag*>(this)->canonicalize();
959 : SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18npool.langtag",
960 : "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
961 : }
962 5 : return meIsValid == DECISION_YES;
963 : }
964 :
965 :
966 333571 : bool LanguageTag::isSystemLocale() const
967 : {
968 333571 : return mbSystemLocale;
969 : }
970 :
971 :
972 19 : LanguageTag & LanguageTag::makeFallback()
973 : {
974 19 : if (!mbIsFallback)
975 : {
976 19 : if (mbInitializedLangID)
977 : {
978 0 : LanguageType nLang1 = getLanguageType();
979 0 : LanguageType nLang2 = MsLangId::Conversion::lookupFallbackLanguage( nLang1);
980 0 : if (nLang1 != nLang2)
981 0 : reset( nLang2);
982 : }
983 : else
984 : {
985 19 : const lang::Locale& rLocale1 = getLocale();
986 19 : lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
987 57 : if ( rLocale1.Language != aLocale2.Language ||
988 19 : rLocale1.Country != aLocale2.Country ||
989 19 : rLocale1.Variant != aLocale2.Variant)
990 0 : reset( aLocale2);
991 : }
992 19 : mbIsFallback = true;
993 : }
994 19 : return *this;
995 : }
996 :
997 :
998 9455 : bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
999 : {
1000 : // Compare full language tag strings but SYSTEM unresolved.
1001 9455 : return getBcp47( false) == rLanguageTag.getBcp47( false);
1002 : }
1003 :
1004 :
1005 9455 : bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
1006 : {
1007 9455 : return !operator==( rLanguageTag);
1008 : }
1009 :
1010 :
1011 : // static
1012 7845 : bool LanguageTag::simpleExtract( const rtl::OUString& rBcp47,
1013 : rtl::OUString& rLanguage,
1014 : rtl::OUString& rScript,
1015 : rtl::OUString& rCountry )
1016 : {
1017 7845 : bool bRet = false;
1018 7845 : const sal_Int32 nLen = rBcp47.getLength();
1019 7845 : const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
1020 7845 : if ((nLen == 2 || nLen == 3) && nHyph1 < 0) // ll or lll
1021 : {
1022 18 : rLanguage = rBcp47;
1023 18 : rScript = rCountry = OUString();
1024 18 : bRet = true;
1025 : }
1026 7827 : else if ( (nLen == 5 && nHyph1 == 2) // ll-CC
1027 : || (nLen == 6 && nHyph1 == 3)) // lll-CC
1028 : {
1029 7817 : rLanguage = rBcp47.copy( 0, nHyph1);
1030 7817 : rCountry = rBcp47.copy( nHyph1 + 1, 2);
1031 7817 : rScript = OUString();
1032 7817 : bRet = true;
1033 : }
1034 10 : else if ( (nHyph1 == 2 && nLen == 10) // ll-Ssss-CC check
1035 : || (nHyph1 == 3 && nLen == 11)) // lll-Ssss-CC check
1036 : {
1037 2 : const sal_Int32 nHyph2 = rBcp47.indexOf( '-', nHyph1 + 1);
1038 2 : if (nHyph2 == nHyph1 + 5)
1039 : {
1040 2 : rLanguage = rBcp47.copy( 0, nHyph1);
1041 2 : rScript = rBcp47.copy( nHyph1 + 1, 4);
1042 2 : rCountry = rBcp47.copy( nHyph2 + 1, 2);
1043 2 : bRet = true;
1044 : }
1045 : }
1046 7845 : if (!bRet)
1047 8 : rLanguage = rScript = rCountry = OUString();
1048 7845 : return bRet;
1049 : }
1050 :
1051 :
1052 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|