Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : */
9 :
10 : #include <config_folders.h>
11 :
12 : #include "i18nlangtag/languagetag.hxx"
13 : #include "i18nlangtag/mslangid.hxx"
14 : #include <rtl/ustrbuf.hxx>
15 : #include <rtl/bootstrap.hxx>
16 : #include <osl/file.hxx>
17 : #include <rtl/instance.hxx>
18 : #include <rtl/locale.h>
19 : #include <boost/unordered_set.hpp>
20 : #include <map>
21 :
22 : //#define erDEBUG
23 :
24 : #if defined(ENABLE_LIBLANGTAG)
25 : #include <liblangtag/langtag.h>
26 : #else
27 : /* Replacement code for LGPL phobic and Android systems.
28 : * For iOS we could probably use NSLocale instead, that should have more or
29 : * less required functionality. If it is good enough, it could be used for Mac
30 : * OS X, too.
31 : */
32 : #include "simple-langtag.cxx"
33 : #endif
34 :
35 : using namespace com::sun::star;
36 :
37 :
38 : // Helper to ensure lt_error_t is free'd
39 : struct myLtError
40 : {
41 : lt_error_t* p;
42 13 : myLtError() : p(NULL) {}
43 13 : ~myLtError() { if (p) lt_error_unref( p); }
44 : };
45 :
46 : // "statics" to be returned as const reference to an empty locale and string.
47 : namespace {
48 : struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
49 : struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
50 : }
51 :
52 : typedef ::boost::unordered_set< OUString, OUStringHash > KnownTagSet;
53 : namespace {
54 : struct theKnowns : public rtl::Static< KnownTagSet, theKnowns > {};
55 : struct theMutex : public rtl::Static< osl::Mutex, theMutex > {};
56 : }
57 :
58 8 : static const KnownTagSet & getKnowns()
59 : {
60 8 : KnownTagSet & rKnowns = theKnowns::get();
61 8 : if (rKnowns.empty())
62 : {
63 5 : osl::MutexGuard aGuard( theMutex::get());
64 5 : if (rKnowns.empty())
65 : {
66 5 : ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
67 8250 : for (::std::vector< MsLangId::LanguagetagMapping >::const_iterator it( aDefined.begin());
68 5500 : it != aDefined.end(); ++it)
69 : {
70 : // Do not use the BCP47 string here to initialize the
71 : // LanguageTag because then canonicalize() would call this
72 : // getKnowns() again..
73 2745 : ::std::vector< OUString > aFallbacks( LanguageTag( (*it).mnLang).getFallbackStrings( true));
74 9075 : for (::std::vector< OUString >::const_iterator fb( aFallbacks.begin()); fb != aFallbacks.end(); ++fb)
75 : {
76 6330 : rKnowns.insert( *fb);
77 : }
78 2750 : }
79 5 : }
80 : }
81 8 : return rKnowns;
82 : }
83 :
84 :
85 : namespace {
86 : struct compareIgnoreAsciiCaseLess
87 : {
88 7564475 : bool operator()( const OUString& r1, const OUString& r2 ) const
89 : {
90 7564475 : return r1.compareToIgnoreAsciiCase( r2) < 0;
91 : }
92 : };
93 : typedef ::std::map< OUString, LanguageTag::ImplPtr, compareIgnoreAsciiCaseLess > MapBcp47;
94 : typedef ::std::map< LanguageType, LanguageTag::ImplPtr > MapLangID;
95 : struct theMapBcp47 : public rtl::Static< MapBcp47, theMapBcp47 > {};
96 : struct theMapLangID : public rtl::Static< MapLangID, theMapLangID > {};
97 : struct theDontKnow : public rtl::Static< LanguageTag::ImplPtr, theDontKnow > {};
98 : struct theSystemLocale : public rtl::Static< LanguageTag::ImplPtr, theSystemLocale > {};
99 : }
100 :
101 :
102 94 : static LanguageType getNextOnTheFlyLanguage()
103 : {
104 : static LanguageType nOnTheFlyLanguage = 0;
105 94 : osl::MutexGuard aGuard( theMutex::get());
106 94 : if (!nOnTheFlyLanguage)
107 26 : nOnTheFlyLanguage = MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START, LANGUAGE_ON_THE_FLY_START);
108 : else
109 : {
110 68 : if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage) != LANGUAGE_ON_THE_FLY_END)
111 68 : ++nOnTheFlyLanguage;
112 : else
113 : {
114 0 : LanguageType nSub = MsLangId::getSubLanguage( nOnTheFlyLanguage);
115 0 : if (nSub != LANGUAGE_ON_THE_FLY_SUB_END)
116 0 : nOnTheFlyLanguage = MsLangId::makeLangID( ++nSub, LANGUAGE_ON_THE_FLY_START);
117 : else
118 : {
119 : SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
120 : << ((LANGUAGE_ON_THE_FLY_END - LANGUAGE_ON_THE_FLY_START + 1)
121 : * (LANGUAGE_ON_THE_FLY_SUB_END - LANGUAGE_ON_THE_FLY_SUB_START + 1))
122 : << " consumed?!?)");
123 0 : return 0;
124 : }
125 : }
126 : }
127 : #if OSL_DEBUG_LEVEL > 0
128 : static size_t nOnTheFlies = 0;
129 : ++nOnTheFlies;
130 : SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies);
131 : #endif
132 94 : return nOnTheFlyLanguage;
133 : }
134 :
135 :
136 : // static
137 222599 : bool LanguageTag::isOnTheFlyID( LanguageType nLang )
138 : {
139 222599 : LanguageType nPri = MsLangId::getPrimaryLanguage( nLang);
140 222599 : LanguageType nSub = MsLangId::getSubLanguage( nLang);
141 : return
142 1005 : LANGUAGE_ON_THE_FLY_START <= nPri && nPri <= LANGUAGE_ON_THE_FLY_END &&
143 223604 : LANGUAGE_ON_THE_FLY_SUB_START <= nSub && nSub <= LANGUAGE_ON_THE_FLY_SUB_END;
144 : }
145 :
146 :
147 : /** A reference holder for liblangtag data de/initialization, one static
148 : instance. Currently implemented such that the first "ref" inits and dtor
149 : (our library deinitialized) tears down.
150 : */
151 : class LiblantagDataRef
152 : {
153 : public:
154 : LiblantagDataRef();
155 : ~LiblantagDataRef();
156 11 : inline void incRef()
157 : {
158 11 : if (mnRef != SAL_MAX_UINT32 && !mnRef++)
159 5 : setup();
160 11 : }
161 16 : inline void decRef()
162 : {
163 16 : if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef)
164 5 : teardown();
165 16 : }
166 : private:
167 : OString maDataPath; // path to liblangtag data, "|" if system
168 : sal_uInt32 mnRef;
169 :
170 : void setupDataPath();
171 : void setup();
172 : void teardown();
173 : };
174 :
175 : namespace {
176 : struct theDataRef : public rtl::Static< LiblantagDataRef, theDataRef > {};
177 : }
178 :
179 5 : LiblantagDataRef::LiblantagDataRef()
180 : :
181 5 : mnRef(0)
182 : {
183 5 : }
184 :
185 10 : LiblantagDataRef::~LiblantagDataRef()
186 : {
187 : // When destructed we're tearing down unconditionally.
188 5 : if (mnRef)
189 5 : mnRef = 1;
190 5 : decRef();
191 5 : }
192 :
193 5 : void LiblantagDataRef::setup()
194 : {
195 : SAL_INFO( "i18nlangtag", "LiblantagDataRef::setup: initializing database");
196 5 : if (maDataPath.isEmpty())
197 5 : setupDataPath();
198 5 : lt_db_initialize();
199 : // Hold ref eternally.
200 5 : mnRef = SAL_MAX_UINT32;
201 5 : }
202 :
203 5 : void LiblantagDataRef::teardown()
204 : {
205 : SAL_INFO( "i18nlangtag", "LiblantagDataRef::teardown: finalizing database");
206 5 : lt_db_finalize();
207 5 : }
208 :
209 5 : void LiblantagDataRef::setupDataPath()
210 : {
211 : // maDataPath is assumed to be empty here.
212 5 : OUString aURL("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/liblangtag");
213 5 : rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
214 :
215 : // Check if data is in our own installation, else assume system
216 : // installation.
217 10 : OUString aData( aURL);
218 5 : aData += "/language-subtag-registry.xml";
219 10 : osl::DirectoryItem aDirItem;
220 5 : if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
221 : {
222 0 : OUString aPath;
223 0 : if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
224 0 : maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
225 : }
226 5 : if (maDataPath.isEmpty())
227 5 : maDataPath = "|"; // assume system
228 : else
229 5 : lt_db_set_datadir( maDataPath.getStr());
230 5 : }
231 :
232 :
233 : /* TODO: we could transform known vendor and browser-specific variants to known
234 : * BCP 47 if available. For now just remove them to not confuse any later
235 : * treatments that check for empty variants. This vendor stuff was never
236 : * supported anyway. */
237 570626 : static void handleVendorVariant( com::sun::star::lang::Locale & rLocale )
238 : {
239 570626 : if (!rLocale.Variant.isEmpty() && rLocale.Language != I18NLANGTAG_QLT)
240 7 : rLocale.Variant = OUString();
241 570626 : }
242 :
243 :
244 : class LanguageTagImpl
245 : {
246 : public:
247 :
248 : explicit LanguageTagImpl( const LanguageTag & rLanguageTag );
249 : explicit LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl );
250 : ~LanguageTagImpl();
251 : LanguageTagImpl& operator=( const LanguageTagImpl & rLanguageTagImpl );
252 :
253 : private:
254 :
255 : friend class LanguageTag;
256 :
257 : enum Decision
258 : {
259 : DECISION_DONTKNOW,
260 : DECISION_NO,
261 : DECISION_YES
262 : };
263 :
264 : mutable com::sun::star::lang::Locale maLocale;
265 : mutable OUString maBcp47;
266 : mutable OUString maCachedLanguage; ///< cache getLanguage()
267 : mutable OUString maCachedScript; ///< cache getScript()
268 : mutable OUString maCachedCountry; ///< cache getCountry()
269 : mutable OUString maCachedVariants; ///< cache getVariants()
270 : mutable lt_tag_t* mpImplLangtag; ///< liblangtag pointer
271 : mutable LanguageType mnLangID;
272 : mutable Decision meIsValid;
273 : mutable Decision meIsIsoLocale;
274 : mutable Decision meIsIsoODF;
275 : mutable Decision meIsLiblangtagNeeded; ///< whether processing with liblangtag needed
276 : bool mbSystemLocale : 1;
277 : mutable bool mbInitializedBcp47 : 1;
278 : mutable bool mbInitializedLocale : 1;
279 : mutable bool mbInitializedLangID : 1;
280 : mutable bool mbCachedLanguage : 1;
281 : mutable bool mbCachedScript : 1;
282 : mutable bool mbCachedCountry : 1;
283 : mutable bool mbCachedVariants : 1;
284 :
285 : const OUString & getBcp47() const;
286 : OUString getLanguage() const;
287 : OUString getScript() const;
288 : OUString getCountry() const;
289 : OUString getRegion() const;
290 : OUString getVariants() const;
291 : bool hasScript() const;
292 :
293 : bool isIsoLocale() const;
294 : bool isIsoODF() const;
295 : bool isValidBcp47() const;
296 :
297 : void convertLocaleToBcp47();
298 : void convertLocaleToLang( bool bAllowOnTheFlyID );
299 : void convertBcp47ToLocale();
300 : void convertBcp47ToLang();
301 : void convertLangToLocale();
302 : void convertLangToBcp47();
303 :
304 : /** @return whether BCP 47 language tag string was changed. */
305 : bool canonicalize();
306 :
307 : /** Canonicalize if not yet done and synchronize initialized conversions.
308 :
309 : @return whether BCP 47 language tag string was changed.
310 : */
311 : bool synCanonicalize();
312 :
313 : OUString getLanguageFromLangtag();
314 : OUString getScriptFromLangtag();
315 : OUString getRegionFromLangtag();
316 : OUString getVariantsFromLangtag();
317 :
318 : /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
319 :
320 : @param nRegisterID
321 : If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
322 : instead of generating an on-the-fly ID. Implementation may
323 : still generate an ID if the suggested ID is already used for
324 : another language tag.
325 :
326 : @return NULL if no ID could be obtained or registration failed.
327 : */
328 : LanguageTag::ImplPtr registerOnTheFly( LanguageType nRegisterID );
329 :
330 : /** Obtain Language, Script, Country and Variants via simpleExtract() and
331 : assign them to the cached variables if successful.
332 :
333 : @return return of simpleExtract()
334 : */
335 : bool cacheSimpleLSCV();
336 :
337 : enum Extraction
338 : {
339 : EXTRACTED_NONE,
340 : EXTRACTED_LSC,
341 : EXTRACTED_LV,
342 : EXTRACTED_X,
343 : EXTRACTED_X_JOKER
344 : };
345 :
346 : /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
347 : portions.
348 :
349 : Does not check case or content!
350 :
351 : @return EXTRACTED_LSC if simple tag was detected (i.e. one that
352 : would fulfill the isIsoODF() condition),
353 : EXTRACTED_LV if a tag with variant was detected,
354 : EXTRACTED_X if x-... privateuse tag was detected,
355 : EXTRACTED_X_JOKER if "*" joker was detected,
356 : EXTRACTED_NONE else.
357 : */
358 : static Extraction simpleExtract( const OUString& rBcp47,
359 : OUString& rLanguage,
360 : OUString& rScript,
361 : OUString& rCountry,
362 : OUString& rVariants );
363 :
364 : /** Convert Locale to BCP 47 string without resolving system and creating
365 : temporary LanguageTag instances. */
366 : static OUString convertToBcp47( const com::sun::star::lang::Locale& rLocale );
367 : };
368 :
369 :
370 242762 : LanguageTagImpl::LanguageTagImpl( const LanguageTag & rLanguageTag )
371 : :
372 : maLocale( rLanguageTag.maLocale),
373 : maBcp47( rLanguageTag.maBcp47),
374 : mpImplLangtag( NULL),
375 : mnLangID( rLanguageTag.mnLangID),
376 : meIsValid( DECISION_DONTKNOW),
377 : meIsIsoLocale( DECISION_DONTKNOW),
378 : meIsIsoODF( DECISION_DONTKNOW),
379 : meIsLiblangtagNeeded( DECISION_DONTKNOW),
380 : mbSystemLocale( rLanguageTag.mbSystemLocale),
381 : mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
382 : mbInitializedLocale( rLanguageTag.mbInitializedLocale),
383 : mbInitializedLangID( rLanguageTag.mbInitializedLangID),
384 : mbCachedLanguage( false),
385 : mbCachedScript( false),
386 : mbCachedCountry( false),
387 242762 : mbCachedVariants( false)
388 : {
389 242762 : }
390 :
391 :
392 0 : LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl )
393 : :
394 : maLocale( rLanguageTagImpl.maLocale),
395 : maBcp47( rLanguageTagImpl.maBcp47),
396 : maCachedLanguage( rLanguageTagImpl.maCachedLanguage),
397 : maCachedScript( rLanguageTagImpl.maCachedScript),
398 : maCachedCountry( rLanguageTagImpl.maCachedCountry),
399 : maCachedVariants( rLanguageTagImpl.maCachedVariants),
400 : mpImplLangtag( rLanguageTagImpl.mpImplLangtag ?
401 0 : lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : NULL),
402 : mnLangID( rLanguageTagImpl.mnLangID),
403 : meIsValid( rLanguageTagImpl.meIsValid),
404 : meIsIsoLocale( rLanguageTagImpl.meIsIsoLocale),
405 : meIsIsoODF( rLanguageTagImpl.meIsIsoODF),
406 : meIsLiblangtagNeeded( rLanguageTagImpl.meIsLiblangtagNeeded),
407 : mbSystemLocale( rLanguageTagImpl.mbSystemLocale),
408 : mbInitializedBcp47( rLanguageTagImpl.mbInitializedBcp47),
409 : mbInitializedLocale( rLanguageTagImpl.mbInitializedLocale),
410 : mbInitializedLangID( rLanguageTagImpl.mbInitializedLangID),
411 : mbCachedLanguage( rLanguageTagImpl.mbCachedLanguage),
412 : mbCachedScript( rLanguageTagImpl.mbCachedScript),
413 : mbCachedCountry( rLanguageTagImpl.mbCachedCountry),
414 0 : mbCachedVariants( rLanguageTagImpl.mbCachedVariants)
415 : {
416 0 : if (mpImplLangtag)
417 0 : theDataRef::get().incRef();
418 0 : }
419 :
420 :
421 0 : LanguageTagImpl& LanguageTagImpl::operator=( const LanguageTagImpl & rLanguageTagImpl )
422 : {
423 0 : if (&rLanguageTagImpl == this)
424 0 : return *this;
425 :
426 0 : maLocale = rLanguageTagImpl.maLocale;
427 0 : maBcp47 = rLanguageTagImpl.maBcp47;
428 0 : maCachedLanguage = rLanguageTagImpl.maCachedLanguage;
429 0 : maCachedScript = rLanguageTagImpl.maCachedScript;
430 0 : maCachedCountry = rLanguageTagImpl.maCachedCountry;
431 0 : maCachedVariants = rLanguageTagImpl.maCachedVariants;
432 : mpImplLangtag = rLanguageTagImpl.mpImplLangtag ?
433 0 : lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : NULL;
434 0 : mnLangID = rLanguageTagImpl.mnLangID;
435 0 : meIsValid = rLanguageTagImpl.meIsValid;
436 0 : meIsIsoLocale = rLanguageTagImpl.meIsIsoLocale;
437 0 : meIsIsoODF = rLanguageTagImpl.meIsIsoODF;
438 0 : meIsLiblangtagNeeded= rLanguageTagImpl.meIsLiblangtagNeeded;
439 0 : mbSystemLocale = rLanguageTagImpl.mbSystemLocale;
440 0 : mbInitializedBcp47 = rLanguageTagImpl.mbInitializedBcp47;
441 0 : mbInitializedLocale = rLanguageTagImpl.mbInitializedLocale;
442 0 : mbInitializedLangID = rLanguageTagImpl.mbInitializedLangID;
443 0 : mbCachedLanguage = rLanguageTagImpl.mbCachedLanguage;
444 0 : mbCachedScript = rLanguageTagImpl.mbCachedScript;
445 0 : mbCachedCountry = rLanguageTagImpl.mbCachedCountry;
446 0 : mbCachedVariants = rLanguageTagImpl.mbCachedVariants;
447 0 : if (mpImplLangtag)
448 0 : theDataRef::get().incRef();
449 0 : return *this;
450 : }
451 :
452 :
453 485152 : LanguageTagImpl::~LanguageTagImpl()
454 : {
455 242576 : if (mpImplLangtag)
456 : {
457 11 : lt_tag_unref( mpImplLangtag);
458 11 : theDataRef::get().decRef();
459 : }
460 242576 : }
461 :
462 :
463 328402 : LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
464 : :
465 : maBcp47( rBcp47LanguageTag),
466 : mnLangID( LANGUAGE_DONTKNOW),
467 328402 : mbSystemLocale( rBcp47LanguageTag.isEmpty()),
468 328402 : mbInitializedBcp47( !mbSystemLocale),
469 : mbInitializedLocale( false),
470 : mbInitializedLangID( false),
471 985206 : mbIsFallback( false)
472 : {
473 328402 : if (bCanonicalize)
474 : {
475 6312 : getImpl()->canonicalize();
476 : // Registration itself may already have canonicalized, so do an
477 : // unconditional sync.
478 6312 : syncFromImpl();
479 : }
480 :
481 328402 : }
482 :
483 :
484 570516 : LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
485 : :
486 : maLocale( rLocale),
487 : mnLangID( LANGUAGE_DONTKNOW),
488 570516 : mbSystemLocale( rLocale.Language.isEmpty()),
489 : mbInitializedBcp47( false),
490 570516 : mbInitializedLocale( !mbSystemLocale),
491 : mbInitializedLangID( false),
492 1711548 : mbIsFallback( false)
493 : {
494 570516 : handleVendorVariant( maLocale);
495 570516 : }
496 :
497 :
498 716436 : LanguageTag::LanguageTag( LanguageType nLanguage )
499 : :
500 : mnLangID( nLanguage),
501 716436 : mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
502 : mbInitializedBcp47( false),
503 : mbInitializedLocale( false),
504 716436 : mbInitializedLangID( !mbSystemLocale),
505 2149308 : mbIsFallback( false)
506 : {
507 716436 : }
508 :
509 :
510 554 : LanguageTag::LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
511 : const OUString& rScript, const OUString& rCountry )
512 : :
513 : maBcp47( rBcp47),
514 : mnLangID( LANGUAGE_DONTKNOW),
515 554 : mbSystemLocale( rBcp47.isEmpty() && rLanguage.isEmpty()),
516 554 : mbInitializedBcp47( !rBcp47.isEmpty()),
517 : mbInitializedLocale( false),
518 : mbInitializedLangID( false),
519 1662 : mbIsFallback( false)
520 : {
521 554 : if (!mbSystemLocale && !mbInitializedBcp47)
522 : {
523 554 : if (rScript.isEmpty())
524 : {
525 554 : maBcp47 = rLanguage + "-" + rCountry;
526 554 : mbInitializedBcp47 = true;
527 554 : maLocale.Language = rLanguage;
528 554 : maLocale.Country = rCountry;
529 554 : mbInitializedLocale = true;
530 : }
531 : else
532 : {
533 0 : if (rCountry.isEmpty())
534 0 : maBcp47 = rLanguage + "-" + rScript;
535 : else
536 0 : maBcp47 = rLanguage + "-" + rScript + "-" + rCountry;
537 0 : mbInitializedBcp47 = true;
538 0 : maLocale.Language = I18NLANGTAG_QLT;
539 0 : maLocale.Country = rCountry;
540 0 : maLocale.Variant = maBcp47;
541 0 : mbInitializedLocale = true;
542 : }
543 : }
544 554 : }
545 :
546 :
547 137 : LanguageTag::LanguageTag( const rtl_Locale & rLocale )
548 : :
549 : maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
550 : mnLangID( LANGUAGE_DONTKNOW),
551 137 : mbSystemLocale( maLocale.Language.isEmpty()),
552 : mbInitializedBcp47( false),
553 137 : mbInitializedLocale( !mbSystemLocale),
554 : mbInitializedLangID( false),
555 411 : mbIsFallback( false)
556 : {
557 137 : convertFromRtlLocale();
558 137 : }
559 :
560 :
561 10432323 : LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
562 : :
563 : maLocale( rLanguageTag.maLocale),
564 : maBcp47( rLanguageTag.maBcp47),
565 : mnLangID( rLanguageTag.mnLangID),
566 : mpImpl( rLanguageTag.mpImpl),
567 : mbSystemLocale( rLanguageTag.mbSystemLocale),
568 : mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
569 : mbInitializedLocale( rLanguageTag.mbInitializedLocale),
570 : mbInitializedLangID( rLanguageTag.mbInitializedLangID),
571 10432323 : mbIsFallback(rLanguageTag.mbIsFallback)
572 : {
573 10432323 : }
574 :
575 :
576 109105 : LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
577 : {
578 109105 : if (&rLanguageTag == this)
579 0 : return *this;
580 :
581 109105 : maLocale = rLanguageTag.maLocale;
582 109105 : maBcp47 = rLanguageTag.maBcp47;
583 109105 : mnLangID = rLanguageTag.mnLangID;
584 109105 : mpImpl = rLanguageTag.mpImpl;
585 109105 : mbSystemLocale = rLanguageTag.mbSystemLocale;
586 109105 : mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47;
587 109105 : mbInitializedLocale = rLanguageTag.mbInitializedLocale;
588 109105 : mbInitializedLangID = rLanguageTag.mbInitializedLangID;
589 109105 : return *this;
590 : }
591 :
592 :
593 12023405 : LanguageTag::~LanguageTag()
594 : {
595 12023405 : }
596 :
597 :
598 3530 : LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID )
599 : {
600 3530 : LanguageTag::ImplPtr pImpl;
601 :
602 3530 : if (!mbInitializedBcp47)
603 : {
604 0 : if (mbInitializedLocale)
605 : {
606 0 : maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
607 0 : mbInitializedBcp47 = !maBcp47.isEmpty();
608 : }
609 : }
610 3530 : if (maBcp47.isEmpty())
611 : {
612 : SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
613 0 : return pImpl;
614 : }
615 :
616 7060 : osl::MutexGuard aGuard( theMutex::get());
617 :
618 3530 : MapBcp47& rMapBcp47 = theMapBcp47::get();
619 3530 : MapBcp47::const_iterator it( rMapBcp47.find( maBcp47));
620 3530 : bool bOtherImpl = false;
621 3530 : if (it != rMapBcp47.end())
622 : {
623 : SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47 << "'");
624 3530 : pImpl = (*it).second;
625 3530 : if (pImpl.get() != this)
626 : {
627 : // Could happen for example if during registerImpl() the tag was
628 : // changed via canonicalize() and the result was already present in
629 : // the map before, for example 'bn-Beng' => 'bn'. This specific
630 : // case is now taken care of in registerImpl() and doesn't reach
631 : // here. However, use the already existing impl if it matches.
632 : SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47 << "'");
633 0 : *this = *pImpl; // ensure consistency
634 0 : bOtherImpl = true;
635 : }
636 : }
637 : else
638 : {
639 : SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47 << "'");
640 0 : pImpl.reset( new LanguageTagImpl( *this));
641 0 : rMapBcp47.insert( ::std::make_pair( maBcp47, pImpl));
642 : }
643 :
644 3530 : if (!bOtherImpl || !pImpl->mbInitializedLangID)
645 : {
646 3530 : if (nRegisterID == 0 || nRegisterID == LANGUAGE_DONTKNOW)
647 93 : nRegisterID = getNextOnTheFlyLanguage();
648 : else
649 : {
650 : // Accept a suggested ID only if it is not mapped yet to something
651 : // different, otherwise we would end up with ambiguous assignments
652 : // of different language tags, for example for the same primary
653 : // LangID with "no", "nb" and "nn".
654 3437 : const MapLangID& rMapLangID = theMapLangID::get();
655 3437 : MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
656 3437 : if (itID != rMapLangID.end())
657 : {
658 1 : if ((*itID).second->maBcp47 != maBcp47)
659 : {
660 : SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
661 : << ::std::hex << nRegisterID << " for '" << maBcp47 << "' have '"
662 : << (*itID).second->maBcp47 << "'");
663 1 : nRegisterID = getNextOnTheFlyLanguage();
664 : }
665 : else
666 : {
667 : SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
668 : << ::std::hex << nRegisterID << " for '" << maBcp47 << "' already registered");
669 : }
670 : }
671 : }
672 3530 : if (!nRegisterID)
673 : {
674 : // out of IDs, nothing to register
675 0 : return pImpl;
676 : }
677 3530 : pImpl->mnLangID = nRegisterID;
678 3530 : pImpl->mbInitializedLangID = true;
679 3530 : if (pImpl.get() != this)
680 : {
681 0 : mnLangID = nRegisterID;
682 0 : mbInitializedLangID = true;
683 : }
684 : }
685 :
686 : ::std::pair< MapLangID::const_iterator, bool > res(
687 3530 : theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
688 3530 : if (res.second)
689 : {
690 : SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
691 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
692 : }
693 : else
694 : {
695 : SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
696 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
697 : << (*res.first).second->maBcp47 << "'");
698 : }
699 :
700 3530 : return pImpl;
701 : }
702 :
703 : // static
704 153 : void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang )
705 : {
706 153 : if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_SYSTEM)
707 : {
708 : SAL_WARN( "i18nlangtag",
709 : "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
710 : ::std::hex << nLang);
711 0 : return;
712 : }
713 : SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex << nLang);
714 153 : MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang);
715 : // Resest system locale to none and let registerImpl() do the rest to
716 : // initialize a new one.
717 153 : theSystemLocale::get().reset();
718 153 : LanguageTag aLanguageTag( LANGUAGE_SYSTEM);
719 153 : aLanguageTag.registerImpl();
720 : }
721 :
722 222595 : static bool lcl_isKnownOnTheFlyID( LanguageType nLang )
723 : {
724 450866 : return nLang != LANGUAGE_DONTKNOW && nLang != LANGUAGE_SYSTEM &&
725 667687 : (LanguageTag::isOnTheFlyID( nLang) || (nLang == MsLangId::getPrimaryLanguage( nLang)));
726 : }
727 :
728 :
729 2460385 : LanguageTag::ImplPtr LanguageTag::registerImpl() const
730 : {
731 : // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
732 : // here as they access getImpl() and syncFromImpl() and would lead to
733 : // recursion. Also do not use the static LanguageTag::convertTo...()
734 : // methods as they may create temporary LanguageTag instances. Only
735 : // LanguageTagImpl::convertToBcp47(Locale) is ok.
736 :
737 2460385 : ImplPtr pImpl;
738 :
739 : #if OSL_DEBUG_LEVEL > 0
740 : static size_t nCalls = 0;
741 : ++nCalls;
742 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls << " calls");
743 : #endif
744 :
745 : // Do not register unresolved system locale, also force LangID if system
746 : // and take the system locale shortcut if possible.
747 2460385 : if (mbSystemLocale)
748 : {
749 201999 : pImpl = theSystemLocale::get();
750 201999 : if (pImpl)
751 : {
752 : #if OSL_DEBUG_LEVEL > 0
753 : static size_t nCallsSystem = 0;
754 : ++nCallsSystem;
755 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem << " system calls");
756 : #endif
757 201802 : return pImpl;
758 : }
759 197 : if (!mbInitializedLangID)
760 : {
761 197 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
762 197 : mbInitializedLangID = (mnLangID != LANGUAGE_SYSTEM);
763 : SAL_WARN_IF( !mbInitializedLangID, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
764 : }
765 : }
766 :
767 2258583 : if (mbInitializedLangID)
768 : {
769 1402007 : if (mnLangID == LANGUAGE_DONTKNOW)
770 : {
771 : // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
772 : // conversion attempts. At the same time provide a central breakpoint
773 : // to inspect such places.
774 623870 : LanguageTag::ImplPtr& rDontKnow = theDontKnow::get();
775 623870 : if (!rDontKnow)
776 131 : rDontKnow.reset( new LanguageTagImpl( *this));
777 623870 : pImpl = rDontKnow;
778 : #if OSL_DEBUG_LEVEL > 0
779 : static size_t nCallsDontKnow = 0;
780 : ++nCallsDontKnow;
781 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow << " DontKnow calls");
782 : #endif
783 623870 : return pImpl;
784 : }
785 : else
786 : {
787 : // A great share are calls for a system equal locale.
788 778137 : pImpl = theSystemLocale::get();
789 778137 : if (pImpl && pImpl->mnLangID == mnLangID)
790 : {
791 : #if OSL_DEBUG_LEVEL > 0
792 : static size_t nCallsSystemEqual = 0;
793 : ++nCallsSystemEqual;
794 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
795 : << " system equal LangID calls");
796 : #endif
797 458831 : return pImpl;
798 : }
799 : }
800 : }
801 :
802 : // Force Bcp47 if not LangID.
803 1175882 : if (!mbInitializedLangID && !mbInitializedBcp47 && mbInitializedLocale)
804 : {
805 530888 : maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
806 530888 : mbInitializedBcp47 = !maBcp47.isEmpty();
807 : }
808 :
809 1175882 : if (mbInitializedBcp47)
810 : {
811 : // A great share are calls for a system equal locale.
812 856651 : pImpl = theSystemLocale::get();
813 856651 : if (pImpl && pImpl->maBcp47 == maBcp47)
814 : {
815 : #if OSL_DEBUG_LEVEL > 0
816 : static size_t nCallsSystemEqual = 0;
817 : ++nCallsSystemEqual;
818 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual << " system equal BCP47 calls");
819 : #endif
820 314739 : return pImpl;
821 : }
822 : }
823 :
824 : #if OSL_DEBUG_LEVEL > 0
825 : static size_t nCallsNonSystem = 0;
826 : ++nCallsNonSystem;
827 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem << " non-system calls");
828 : #endif
829 :
830 1722286 : osl::MutexGuard aGuard( theMutex::get());
831 :
832 : #if OSL_DEBUG_LEVEL > 0
833 : static long nRunning = 0;
834 : // Entering twice here is ok, which is needed for fallback init in
835 : // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
836 : // everything else is suspicious.
837 : SAL_WARN_IF( nRunning > 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
838 : << maBcp47 << "' 0x" << ::std::hex << mnLangID );
839 : struct Runner { Runner() { ++nRunning; } ~Runner() { --nRunning; } } aRunner;
840 : #endif
841 :
842 : // Prefer LangID map as find+insert needs less comparison work.
843 861143 : if (mbInitializedLangID)
844 : {
845 319306 : MapLangID& rMap = theMapLangID::get();
846 319306 : MapLangID::const_iterator it( rMap.find( mnLangID));
847 319306 : if (it != rMap.end())
848 : {
849 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex << mnLangID);
850 316900 : pImpl = (*it).second;
851 : }
852 : else
853 : {
854 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex << mnLangID);
855 2406 : pImpl.reset( new LanguageTagImpl( *this));
856 2406 : rMap.insert( ::std::make_pair( mnLangID, pImpl));
857 : // Try round-trip.
858 2406 : if (!pImpl->mbInitializedLocale)
859 2401 : pImpl->convertLangToLocale();
860 2406 : LanguageType nLang = MsLangId::Conversion::convertLocaleToLanguage( pImpl->maLocale);
861 : // If round-trip is identical cross-insert to Bcp47 map.
862 2406 : if (nLang == pImpl->mnLangID)
863 : {
864 2261 : if (!pImpl->mbInitializedBcp47)
865 2256 : pImpl->convertLocaleToBcp47();
866 : ::std::pair< MapBcp47::const_iterator, bool > res(
867 2261 : theMapBcp47::get().insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
868 2261 : if (res.second)
869 : {
870 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl->maBcp47 << "'"
871 : << " for 0x" << ::std::hex << mnLangID);
872 : }
873 : else
874 : {
875 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "'"
876 : << " for 0x" << ::std::hex << mnLangID << " have 0x"
877 : << ::std::hex << (*res.first).second->mnLangID);
878 : }
879 : }
880 : else
881 : {
882 145 : if (!pImpl->mbInitializedBcp47)
883 145 : pImpl->convertLocaleToBcp47();
884 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "'"
885 : << " for 0x" << ::std::hex << mnLangID << " round-trip to 0x" << ::std::hex << nLang);
886 : }
887 : }
888 : }
889 541837 : else if (!maBcp47.isEmpty())
890 : {
891 541837 : MapBcp47& rMap = theMapBcp47::get();
892 541837 : MapBcp47::const_iterator it( rMap.find( maBcp47));
893 541837 : if (it != rMap.end())
894 : {
895 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47 << "'");
896 301612 : pImpl = (*it).second;
897 : }
898 : else
899 : {
900 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47 << "'");
901 240225 : pImpl.reset( new LanguageTagImpl( *this));
902 240225 : ::std::pair< MapBcp47::iterator, bool > insOrig( rMap.insert( ::std::make_pair( maBcp47, pImpl)));
903 : // If changed after canonicalize() also add the resulting tag to
904 : // the map.
905 240225 : if (pImpl->synCanonicalize())
906 : {
907 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl->maBcp47 << "'");
908 : ::std::pair< MapBcp47::const_iterator, bool > insCanon(
909 24424 : rMap.insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
910 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon.second ? "" : "not ")
911 : << "inserted '" << pImpl->maBcp47 << "'");
912 : // If the canonicalized tag already existed (was not inserted)
913 : // and impls are different, make this impl that impl and skip
914 : // the rest if that LangID is present as well. The existing
915 : // entry may or may not be different, it may even be strictly
916 : // identical to this if it differs only in case (e.g. ko-kr =>
917 : // ko-KR) which was corrected in canonicalize() hence also in
918 : // the map entry but comparison is case insensitive and found
919 : // it again.
920 24424 : if (!insCanon.second && (*insCanon.first).second != pImpl)
921 : {
922 17630 : (*insOrig.first).second = pImpl = (*insCanon.first).second;
923 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
924 : << ::std::hex << pImpl->mnLangID);
925 : }
926 : }
927 240225 : if (!pImpl->mbInitializedLangID)
928 : {
929 : // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
930 222595 : if (!pImpl->mbInitializedLocale)
931 222521 : pImpl->convertBcp47ToLocale();
932 222595 : if (!pImpl->mbInitializedLangID)
933 222595 : pImpl->convertLocaleToLang( true);
934 : // Unconditionally insert (round-trip is possible) for
935 : // on-the-fly IDs and (generated or not) suggested IDs.
936 222595 : bool bInsert = lcl_isKnownOnTheFlyID( pImpl->mnLangID);
937 222595 : OUString aBcp47;
938 222595 : if (!bInsert)
939 : {
940 216917 : if (pImpl->mnLangID != LANGUAGE_DONTKNOW)
941 : {
942 : // May have involved canonicalize(), so compare with
943 : // pImpl->maBcp47 instead of maBcp47!
944 433830 : aBcp47 = LanguageTagImpl::convertToBcp47(
945 433830 : MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true));
946 216915 : bInsert = (aBcp47 == pImpl->maBcp47);
947 : }
948 : }
949 : // If round-trip is identical cross-insert to Bcp47 map.
950 222595 : if (bInsert)
951 : {
952 : ::std::pair< MapLangID::const_iterator, bool > res(
953 209702 : theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
954 209702 : if (res.second)
955 : {
956 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
957 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
958 : }
959 : else
960 : {
961 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
962 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
963 : << (*res.first).second->maBcp47 << "'");
964 : }
965 : }
966 : else
967 : {
968 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
969 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' round-trip to '"
970 : << aBcp47 << "'");
971 222595 : }
972 : }
973 : }
974 : }
975 : else
976 : {
977 : SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex << mnLangID );
978 0 : pImpl.reset( new LanguageTagImpl( *this));
979 : }
980 :
981 : // If we reach here for mbSystemLocale we didn't have theSystemLocale
982 : // above, so add it.
983 861143 : if (mbSystemLocale && mbInitializedLangID)
984 : {
985 197 : theSystemLocale::get() = pImpl;
986 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
987 : << ::std::hex << pImpl->mnLangID << " '" << pImpl->maBcp47 << "'");
988 : }
989 :
990 861143 : return pImpl;
991 : }
992 :
993 :
994 3981056 : LanguageTag::ImplPtr LanguageTag::getImpl() const
995 : {
996 3981056 : if (!mpImpl)
997 : {
998 2460157 : mpImpl = registerImpl();
999 2460157 : syncVarsFromRawImpl();
1000 : }
1001 3981056 : return mpImpl;
1002 : }
1003 :
1004 :
1005 2814232 : void LanguageTag::resetVars()
1006 : {
1007 2814232 : mpImpl.reset();
1008 2814232 : maLocale = lang::Locale();
1009 2814232 : maBcp47 = OUString();
1010 2814232 : mnLangID = LANGUAGE_SYSTEM;
1011 2814232 : mbSystemLocale = true;
1012 2814232 : mbInitializedBcp47 = false;
1013 2814232 : mbInitializedLocale = false;
1014 2814232 : mbInitializedLangID = false;
1015 2814232 : mbIsFallback = false;
1016 2814232 : }
1017 :
1018 :
1019 373 : LanguageTag & LanguageTag::reset( const OUString & rBcp47LanguageTag, bool bCanonicalize )
1020 : {
1021 373 : resetVars();
1022 373 : maBcp47 = rBcp47LanguageTag;
1023 373 : mbSystemLocale = rBcp47LanguageTag.isEmpty();
1024 373 : mbInitializedBcp47 = !mbSystemLocale;
1025 :
1026 373 : if (bCanonicalize)
1027 : {
1028 0 : getImpl()->canonicalize();
1029 : // Registration itself may already have canonicalized, so do an
1030 : // unconditional sync.
1031 0 : syncFromImpl();
1032 : }
1033 373 : return *this;
1034 : }
1035 :
1036 :
1037 110 : LanguageTag & LanguageTag::reset( const com::sun::star::lang::Locale & rLocale )
1038 : {
1039 110 : resetVars();
1040 110 : maLocale = rLocale;
1041 110 : mbSystemLocale = rLocale.Language.isEmpty();
1042 110 : mbInitializedLocale = !mbSystemLocale;
1043 110 : handleVendorVariant( maLocale);
1044 110 : return *this;
1045 : }
1046 :
1047 :
1048 2813749 : LanguageTag & LanguageTag::reset( LanguageType nLanguage )
1049 : {
1050 2813749 : resetVars();
1051 2813749 : mnLangID = nLanguage;
1052 2813749 : mbSystemLocale = nLanguage == LANGUAGE_SYSTEM;
1053 2813749 : mbInitializedLangID = !mbSystemLocale;
1054 2813749 : return *this;
1055 : }
1056 :
1057 :
1058 248993 : bool LanguageTagImpl::canonicalize()
1059 : {
1060 : #ifdef erDEBUG
1061 : // dump once
1062 : struct dumper
1063 : {
1064 : lt_tag_t** mpp;
1065 : dumper( lt_tag_t** pp ) : mpp( *pp ? NULL : pp) {}
1066 : ~dumper() { if (mpp && *mpp) lt_tag_dump( *mpp); }
1067 : };
1068 : dumper aDumper( &mpImplLangtag);
1069 : #endif
1070 :
1071 248993 : bool bChanged = false;
1072 :
1073 : // Side effect: have maBcp47 in any case, resolved system.
1074 : // Some methods calling canonicalize() (or not calling it due to
1075 : // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
1076 : // meIsLiblangtagNeeded anywhere else than hereafter.
1077 248993 : getBcp47();
1078 :
1079 : // The simple cases and known locales don't need liblangtag processing,
1080 : // which also avoids loading liblangtag data on startup.
1081 248993 : if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
1082 : {
1083 242681 : bool bTemporaryLocale = false;
1084 242681 : bool bTemporaryLangID = false;
1085 242681 : if (!mbInitializedLocale && !mbInitializedLangID)
1086 : {
1087 240151 : if (mbSystemLocale)
1088 : {
1089 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1090 0 : mbInitializedLangID = true;
1091 : }
1092 : else
1093 : {
1094 : // Now this is getting funny.. we only have some BCP47 string
1095 : // and want to determine if parsing it would be possible
1096 : // without using liblangtag just to see if it is a simple known
1097 : // locale or could fall back to one.
1098 480302 : OUString aLanguage, aScript, aCountry, aVariants;
1099 240151 : Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
1100 240151 : if (eExt != EXTRACTED_NONE)
1101 : {
1102 240147 : if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV)
1103 : {
1104 : // Rebuild bcp47 with proper casing of tags.
1105 240139 : OUStringBuffer aBuf( aLanguage.getLength() + 1 + aScript.getLength() +
1106 240139 : 1 + aCountry.getLength() + 1 + aVariants.getLength());
1107 240139 : aBuf.append( aLanguage);
1108 240139 : if (!aScript.isEmpty())
1109 30829 : aBuf.append("-" + aScript);
1110 240139 : if (!aCountry.isEmpty())
1111 210860 : aBuf.append("-" + aCountry);
1112 240139 : if (!aVariants.isEmpty())
1113 904 : aBuf.append("-" + aVariants);
1114 480278 : OUString aStr( aBuf.makeStringAndClear());
1115 :
1116 240139 : if (maBcp47 != aStr)
1117 : {
1118 930 : maBcp47 = aStr;
1119 930 : bChanged = true;
1120 240139 : }
1121 : }
1122 240147 : if (eExt == EXTRACTED_LSC && aScript.isEmpty())
1123 : {
1124 208406 : maLocale.Language = aLanguage;
1125 208406 : maLocale.Country = aCountry;
1126 : }
1127 : else
1128 : {
1129 31741 : maLocale.Language = I18NLANGTAG_QLT;
1130 31741 : maLocale.Country = aCountry;
1131 31741 : maLocale.Variant = maBcp47;
1132 : }
1133 240147 : bTemporaryLocale = mbInitializedLocale = true;
1134 240151 : }
1135 : }
1136 : }
1137 242681 : if (mbInitializedLangID && !mbInitializedLocale)
1138 : {
1139 : // Do not call getLocale() here because that prefers
1140 : // convertBcp47ToLocale() which would end up in recursion via
1141 : // isIsoLocale()!
1142 :
1143 : // Prepare to verify that we have a known locale, not just an
1144 : // arbitrary MS-LangID.
1145 0 : convertLangToLocale();
1146 : }
1147 242681 : if (mbInitializedLocale)
1148 : {
1149 242677 : if (maLocale.Variant.isEmpty())
1150 210587 : meIsLiblangtagNeeded = DECISION_NO; // per definition ll[l][-CC]
1151 : else
1152 : {
1153 32090 : if (!mbInitializedLangID)
1154 : {
1155 31741 : convertLocaleToLang( false);
1156 31741 : if (bTemporaryLocale || mnLangID == LANGUAGE_DONTKNOW)
1157 31741 : bTemporaryLangID = true;
1158 : }
1159 32090 : if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
1160 32082 : meIsLiblangtagNeeded = DECISION_NO; // known locale
1161 : else
1162 : {
1163 8 : const KnownTagSet& rKnowns = getKnowns();
1164 8 : if (rKnowns.find( maBcp47) != rKnowns.end())
1165 1 : meIsLiblangtagNeeded = DECISION_NO; // known fallback
1166 : }
1167 : }
1168 : // We may have an internal override "canonicalization".
1169 242677 : lang::Locale aNew( MsLangId::Conversion::getOverride( maLocale));
1170 505396 : if (!aNew.Language.isEmpty() &&
1171 455189 : (aNew.Language != maLocale.Language ||
1172 431622 : aNew.Country != maLocale.Country ||
1173 215585 : aNew.Variant != maLocale.Variant))
1174 : {
1175 23567 : maBcp47 = LanguageTagImpl::convertToBcp47( aNew);
1176 23567 : bChanged = true;
1177 23567 : meIsIsoLocale = DECISION_DONTKNOW;
1178 23567 : meIsIsoODF = DECISION_DONTKNOW;
1179 23567 : meIsLiblangtagNeeded = DECISION_NO; // known locale
1180 242677 : }
1181 : }
1182 242681 : if (bTemporaryLocale)
1183 : {
1184 240147 : mbInitializedLocale = false;
1185 240147 : maLocale = lang::Locale();
1186 : }
1187 242681 : if (bTemporaryLangID)
1188 : {
1189 31741 : mbInitializedLangID = false;
1190 31741 : mnLangID = LANGUAGE_DONTKNOW;
1191 : }
1192 : }
1193 248993 : if (meIsLiblangtagNeeded == DECISION_NO)
1194 : {
1195 248980 : meIsValid = DECISION_YES; // really, known must be valid ...
1196 248980 : return bChanged; // that's it
1197 : }
1198 :
1199 13 : meIsLiblangtagNeeded = DECISION_YES;
1200 : SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
1201 :
1202 13 : if (!mpImplLangtag)
1203 : {
1204 11 : theDataRef::get().incRef();
1205 11 : mpImplLangtag = lt_tag_new();
1206 : }
1207 :
1208 13 : myLtError aError;
1209 :
1210 13 : if (lt_tag_parse( mpImplLangtag, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
1211 : {
1212 9 : char* pTag = lt_tag_canonicalize( mpImplLangtag, &aError.p);
1213 : SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
1214 9 : if (pTag)
1215 : {
1216 9 : OUString aNew( OUString::createFromAscii( pTag));
1217 : // Make the lt_tag_t follow the new string if different, which
1218 : // removes default script and such.
1219 9 : if (maBcp47 != aNew)
1220 : {
1221 2 : maBcp47 = aNew;
1222 2 : bChanged = true;
1223 2 : meIsIsoLocale = DECISION_DONTKNOW;
1224 2 : meIsIsoODF = DECISION_DONTKNOW;
1225 2 : if (!lt_tag_parse( mpImplLangtag, pTag, &aError.p))
1226 : {
1227 : SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '" << maBcp47 << "'");
1228 0 : free( pTag);
1229 0 : meIsValid = DECISION_NO;
1230 0 : return bChanged;
1231 : }
1232 : }
1233 9 : free( pTag);
1234 9 : meIsValid = DECISION_YES;
1235 9 : return bChanged;
1236 : }
1237 : }
1238 : else
1239 : {
1240 : SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
1241 : }
1242 4 : meIsValid = DECISION_NO;
1243 4 : return bChanged;
1244 : }
1245 :
1246 :
1247 1285497 : bool LanguageTagImpl::synCanonicalize()
1248 : {
1249 1285497 : bool bChanged = false;
1250 1285497 : if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
1251 : {
1252 242681 : bChanged = canonicalize();
1253 242681 : if (bChanged)
1254 : {
1255 24499 : if (mbInitializedLocale)
1256 75 : convertBcp47ToLocale();
1257 24499 : if (mbInitializedLangID)
1258 75 : convertBcp47ToLang();
1259 : }
1260 : }
1261 1285497 : return bChanged;
1262 : }
1263 :
1264 :
1265 423001 : void LanguageTag::syncFromImpl()
1266 : {
1267 423001 : ImplPtr xImpl = getImpl();
1268 423001 : LanguageTagImpl* pImpl = xImpl.get();
1269 423076 : bool bRegister = ((mbInitializedBcp47 && maBcp47 != pImpl->maBcp47) ||
1270 845927 : (mbInitializedLangID && mnLangID != pImpl->mnLangID));
1271 : SAL_INFO_IF( bRegister, "i18nlangtag",
1272 : "LanguageTag::syncFromImpl: re-registering, '" << pImpl->maBcp47 << "' vs '" << maBcp47 <<
1273 : " and 0x" << ::std::hex << pImpl->mnLangID << " vs 0x" << ::std::hex << mnLangID);
1274 423001 : syncVarsFromRawImpl();
1275 423001 : if (bRegister)
1276 75 : mpImpl = registerImpl();
1277 423001 : }
1278 :
1279 :
1280 1244538 : void LanguageTag::syncVarsFromImpl() const
1281 : {
1282 1244538 : if (!mpImpl)
1283 1244538 : getImpl(); // with side effect syncVarsFromRawImpl()
1284 : else
1285 0 : syncVarsFromRawImpl();
1286 1244538 : }
1287 :
1288 :
1289 2883158 : void LanguageTag::syncVarsFromRawImpl() const
1290 : {
1291 : // Do not use getImpl() here.
1292 2883158 : LanguageTagImpl* pImpl = mpImpl.get();
1293 2883158 : if (!pImpl)
1294 2883158 : return;
1295 :
1296 : // Obviously only mutable variables.
1297 2883158 : mbInitializedBcp47 = pImpl->mbInitializedBcp47;
1298 2883158 : maBcp47 = pImpl->maBcp47;
1299 2883158 : mbInitializedLocale = pImpl->mbInitializedLocale;
1300 2883158 : maLocale = pImpl->maLocale;
1301 2883158 : mbInitializedLangID = pImpl->mbInitializedLangID;
1302 2883158 : mnLangID = pImpl->mnLangID;
1303 : }
1304 :
1305 :
1306 0 : bool LanguageTag::synCanonicalize()
1307 : {
1308 0 : bool bChanged = getImpl()->synCanonicalize();
1309 0 : if (bChanged)
1310 0 : syncFromImpl();
1311 0 : return bChanged;
1312 : }
1313 :
1314 :
1315 2532 : void LanguageTagImpl::convertLocaleToBcp47()
1316 : {
1317 2532 : if (mbSystemLocale && !mbInitializedLocale)
1318 0 : convertLangToLocale();
1319 :
1320 2532 : if (maLocale.Language.isEmpty())
1321 : {
1322 : // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
1323 : // locale via LanguageTag::convertToBcp47(LanguageType) and
1324 : // LanguageTag::convertToLocale(LanguageType) would instanciate another
1325 : // LanguageTag.
1326 0 : maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, true);
1327 : }
1328 2532 : if (maLocale.Language.isEmpty())
1329 : {
1330 0 : maBcp47 = OUString(); // bad luck
1331 : }
1332 2532 : else if (maLocale.Language == I18NLANGTAG_QLT)
1333 : {
1334 384 : maBcp47 = maLocale.Variant;
1335 384 : meIsIsoLocale = DECISION_NO;
1336 : }
1337 : else
1338 : {
1339 2148 : maBcp47 = LanguageTag::convertToBcp47( maLocale, true);
1340 : }
1341 2532 : mbInitializedBcp47 = true;
1342 2532 : }
1343 :
1344 :
1345 254411 : void LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID )
1346 : {
1347 254411 : if (mbSystemLocale)
1348 : {
1349 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1350 : }
1351 : else
1352 : {
1353 254411 : mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
1354 254411 : if (mnLangID == LANGUAGE_DONTKNOW && bAllowOnTheFlyID)
1355 : {
1356 3532 : if (isValidBcp47())
1357 : {
1358 : // For language-only (including script) look if we know some
1359 : // locale of that language and if so try to use the primary
1360 : // language ID of that instead of generating an on-the-fly ID.
1361 3530 : if (getCountry().isEmpty() && isIsoODF())
1362 : {
1363 3439 : lang::Locale aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale));
1364 : // 'en-US' is last resort, do not use except when looking
1365 : // for 'en'.
1366 3439 : if (aLoc.Language != "en" || getLanguage() == "en")
1367 : {
1368 3437 : mnLangID = MsLangId::Conversion::convertLocaleToLanguage( aLoc);
1369 3437 : if (mnLangID != LANGUAGE_DONTKNOW)
1370 3437 : mnLangID = MsLangId::getPrimaryLanguage( mnLangID);
1371 3439 : }
1372 : }
1373 3530 : registerOnTheFly( mnLangID);
1374 : }
1375 : else
1376 : {
1377 : SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
1378 : << maBcp47 << "'");
1379 : }
1380 : }
1381 : }
1382 254411 : mbInitializedLangID = true;
1383 254411 : }
1384 :
1385 :
1386 0 : void LanguageTag::convertLocaleToLang()
1387 : {
1388 0 : getImpl()->convertLocaleToLang( true);
1389 0 : syncFromImpl();
1390 0 : }
1391 :
1392 :
1393 222596 : void LanguageTagImpl::convertBcp47ToLocale()
1394 : {
1395 222596 : bool bIso = isIsoLocale();
1396 222596 : if (bIso)
1397 : {
1398 196713 : maLocale.Language = getLanguageFromLangtag();
1399 196713 : maLocale.Country = getRegionFromLangtag();
1400 196713 : maLocale.Variant = OUString();
1401 : }
1402 : else
1403 : {
1404 25883 : maLocale.Language = I18NLANGTAG_QLT;
1405 25883 : maLocale.Country = getCountry();
1406 25883 : maLocale.Variant = maBcp47;
1407 : }
1408 222596 : mbInitializedLocale = true;
1409 222596 : }
1410 :
1411 :
1412 0 : void LanguageTag::convertBcp47ToLocale()
1413 : {
1414 0 : getImpl()->convertBcp47ToLocale();
1415 0 : syncFromImpl();
1416 0 : }
1417 :
1418 :
1419 75 : void LanguageTagImpl::convertBcp47ToLang()
1420 : {
1421 75 : if (mbSystemLocale)
1422 : {
1423 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1424 : }
1425 : else
1426 : {
1427 75 : if (!mbInitializedLocale)
1428 0 : convertBcp47ToLocale();
1429 75 : convertLocaleToLang( true);
1430 : }
1431 75 : mbInitializedLangID = true;
1432 75 : }
1433 :
1434 :
1435 0 : void LanguageTag::convertBcp47ToLang()
1436 : {
1437 0 : getImpl()->convertBcp47ToLang();
1438 0 : syncFromImpl();
1439 0 : }
1440 :
1441 :
1442 2532 : void LanguageTagImpl::convertLangToLocale()
1443 : {
1444 2532 : if (mbSystemLocale && !mbInitializedLangID)
1445 : {
1446 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1447 0 : mbInitializedLangID = true;
1448 : }
1449 : // Resolve system here! The original is remembered as mbSystemLocale.
1450 2532 : maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true);
1451 2532 : mbInitializedLocale = true;
1452 2532 : }
1453 :
1454 :
1455 0 : void LanguageTag::convertLangToLocale()
1456 : {
1457 0 : getImpl()->convertLangToLocale();
1458 0 : syncFromImpl();
1459 0 : }
1460 :
1461 :
1462 131 : void LanguageTagImpl::convertLangToBcp47()
1463 : {
1464 131 : if (!mbInitializedLocale)
1465 131 : convertLangToLocale();
1466 131 : convertLocaleToBcp47();
1467 131 : mbInitializedBcp47 = true;
1468 131 : }
1469 :
1470 :
1471 137 : void LanguageTag::convertFromRtlLocale()
1472 : {
1473 : // The rtl_Locale follows the Open Group Base Specification,
1474 : // 8.2 Internationalization Variables
1475 : // language[_territory][.codeset][@modifier]
1476 : // On GNU/Linux systems usually being glibc locales.
1477 : // sal/osl/unx/nlsupport.c _parse_locale() parses them into
1478 : // Language: language 2 or 3 alpha code
1479 : // Country: [territory] 2 alpha code
1480 : // Variant: [.codeset][@modifier]
1481 : // Variant effectively contains anything that follows the territory, not
1482 : // looking for '.' dot delimiter or '@' modifier content.
1483 137 : if (!maLocale.Variant.isEmpty())
1484 : {
1485 274 : OString aStr = OUStringToOString( maLocale.Language + "_" + maLocale.Country + maLocale.Variant,
1486 137 : RTL_TEXTENCODING_UTF8);
1487 : /* FIXME: let liblangtag parse this entirely with
1488 : * lt_tag_convert_from_locale() but that needs a patch to pass the
1489 : * string. */
1490 : #if 0
1491 : myLtError aError;
1492 : theDataRef::get().incRef();
1493 : mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
1494 : maBcp47 = OStringToOUString( lt_tag_get_string( mpImplLangtag), RTL_TEXTENCODING_UTF8);
1495 : mbInitializedBcp47 = true;
1496 : #else
1497 137 : mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr);
1498 137 : if (mnLangID == LANGUAGE_DONTKNOW)
1499 : {
1500 : SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
1501 0 : mnLangID = LANGUAGE_ENGLISH_US; // we need _something_ here
1502 : }
1503 137 : mbInitializedLangID = true;
1504 : #endif
1505 137 : maLocale = lang::Locale();
1506 137 : mbInitializedLocale = false;
1507 : }
1508 137 : }
1509 :
1510 :
1511 249118 : const OUString & LanguageTagImpl::getBcp47() const
1512 : {
1513 249118 : if (!mbInitializedBcp47)
1514 : {
1515 131 : if (mbInitializedLocale)
1516 0 : const_cast<LanguageTagImpl*>(this)->convertLocaleToBcp47();
1517 : else
1518 131 : const_cast<LanguageTagImpl*>(this)->convertLangToBcp47();
1519 : }
1520 249118 : return maBcp47;
1521 : }
1522 :
1523 :
1524 256591 : const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
1525 : {
1526 256591 : if (!bResolveSystem && mbSystemLocale)
1527 3826 : return theEmptyBcp47::get();
1528 252765 : if (!mbInitializedBcp47)
1529 200549 : syncVarsFromImpl();
1530 252765 : if (!mbInitializedBcp47)
1531 : {
1532 125 : getImpl()->getBcp47();
1533 125 : const_cast<LanguageTag*>(this)->syncFromImpl();
1534 : }
1535 252765 : return maBcp47;
1536 : }
1537 :
1538 :
1539 395926 : OUString LanguageTagImpl::getLanguageFromLangtag()
1540 : {
1541 395926 : OUString aLanguage;
1542 395926 : synCanonicalize();
1543 395926 : if (maBcp47.isEmpty())
1544 0 : return aLanguage;
1545 395926 : if (mpImplLangtag)
1546 : {
1547 10 : const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag);
1548 : SAL_WARN_IF( !pLangT, "i18nlangtag",
1549 : "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'");
1550 10 : if (!pLangT)
1551 5 : return aLanguage;
1552 5 : const char* pLang = lt_lang_get_tag( pLangT);
1553 : SAL_WARN_IF( !pLang, "i18nlangtag",
1554 : "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'");
1555 5 : if (pLang)
1556 5 : aLanguage = OUString::createFromAscii( pLang);
1557 : }
1558 : else
1559 : {
1560 395916 : if (mbCachedLanguage || cacheSimpleLSCV())
1561 395914 : aLanguage = maCachedLanguage;
1562 : }
1563 395921 : return aLanguage;
1564 : }
1565 :
1566 :
1567 9 : OUString LanguageTagImpl::getScriptFromLangtag()
1568 : {
1569 9 : OUString aScript;
1570 9 : synCanonicalize();
1571 9 : if (maBcp47.isEmpty())
1572 0 : return aScript;
1573 9 : if (mpImplLangtag)
1574 : {
1575 8 : const lt_script_t* pScriptT = lt_tag_get_script( mpImplLangtag);
1576 : // pScriptT==NULL is valid for default scripts
1577 8 : if (!pScriptT)
1578 8 : return aScript;
1579 0 : const char* pScript = lt_script_get_tag( pScriptT);
1580 : SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
1581 0 : if (pScript)
1582 0 : aScript = OUString::createFromAscii( pScript);
1583 : }
1584 : else
1585 : {
1586 1 : if (mbCachedScript || cacheSimpleLSCV())
1587 1 : aScript = maCachedScript;
1588 : }
1589 1 : return aScript;
1590 : }
1591 :
1592 :
1593 421297 : OUString LanguageTagImpl::getRegionFromLangtag()
1594 : {
1595 421297 : OUString aRegion;
1596 421297 : synCanonicalize();
1597 421297 : if (maBcp47.isEmpty())
1598 0 : return aRegion;
1599 421297 : if (mpImplLangtag)
1600 : {
1601 14 : const lt_region_t* pRegionT = lt_tag_get_region( mpImplLangtag);
1602 : // pRegionT==NULL is valid for language only tags, rough check here
1603 : // that does not take sophisticated tags into account that actually
1604 : // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
1605 : // that ll-CC and lll-CC actually fail.
1606 : SAL_WARN_IF( !pRegionT &&
1607 : maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
1608 : maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
1609 : "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'");
1610 14 : if (!pRegionT)
1611 14 : return aRegion;
1612 0 : const char* pRegion = lt_region_get_tag( pRegionT);
1613 : SAL_WARN_IF( !pRegion, "i18nlangtag",
1614 : "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'");
1615 0 : if (pRegion)
1616 0 : aRegion = OUString::createFromAscii( pRegion);
1617 : }
1618 : else
1619 : {
1620 421283 : if (mbCachedCountry || cacheSimpleLSCV())
1621 421280 : aRegion = maCachedCountry;
1622 : }
1623 421283 : return aRegion;
1624 : }
1625 :
1626 :
1627 1 : OUString LanguageTagImpl::getVariantsFromLangtag()
1628 : {
1629 1 : OUString aVariants;
1630 1 : synCanonicalize();
1631 1 : if (maBcp47.isEmpty())
1632 0 : return aVariants;
1633 1 : if (mpImplLangtag)
1634 : {
1635 1 : const lt_list_t* pVariantsT = lt_tag_get_variants( mpImplLangtag);
1636 2 : for (const lt_list_t* pE = pVariantsT; pE; pE = lt_list_next( pE))
1637 : {
1638 1 : const lt_variant_t* pVariantT = static_cast<const lt_variant_t*>(lt_list_value( pE));
1639 1 : if (pVariantT)
1640 : {
1641 1 : const char* p = lt_variant_get_tag( pVariantT);
1642 1 : if (p)
1643 : {
1644 1 : if (aVariants.isEmpty())
1645 1 : aVariants = OUString::createFromAscii( p);
1646 : else
1647 0 : aVariants += "-" + OUString::createFromAscii( p);
1648 : }
1649 : }
1650 : }
1651 : }
1652 : else
1653 : {
1654 0 : if (mbCachedVariants || cacheSimpleLSCV())
1655 0 : aVariants = maCachedVariants;
1656 : }
1657 1 : return aVariants;
1658 : }
1659 :
1660 :
1661 2452348 : const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
1662 : {
1663 2452348 : if (!bResolveSystem && mbSystemLocale)
1664 276 : return theEmptyLocale::get();
1665 2452072 : if (!mbInitializedLocale)
1666 547446 : syncVarsFromImpl();
1667 2452072 : if (!mbInitializedLocale)
1668 : {
1669 0 : if (mbInitializedBcp47)
1670 0 : const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
1671 : else
1672 0 : const_cast<LanguageTag*>(this)->convertLangToLocale();
1673 : }
1674 2452072 : return maLocale;
1675 : }
1676 :
1677 :
1678 10150540 : LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
1679 : {
1680 10150540 : if (!bResolveSystem && mbSystemLocale)
1681 1890126 : return LANGUAGE_SYSTEM;
1682 8260414 : if (!mbInitializedLangID)
1683 496543 : syncVarsFromImpl();
1684 8260414 : if (!mbInitializedLangID)
1685 : {
1686 0 : if (mbInitializedBcp47)
1687 0 : const_cast<LanguageTag*>(this)->convertBcp47ToLang();
1688 : else
1689 : {
1690 0 : const_cast<LanguageTag*>(this)->convertLocaleToLang();
1691 :
1692 : /* Resolve a locale only unknown due to some redundant information,
1693 : * like 'de-Latn-DE' with script tag. Never call canonicalize()
1694 : * from within convert...() methods due to possible recursion, so
1695 : * do it here. */
1696 0 : if ((!mbSystemLocale && mnLangID == LANGUAGE_SYSTEM) || mnLangID == LANGUAGE_DONTKNOW)
1697 0 : const_cast<LanguageTag*>(this)->synCanonicalize();
1698 : }
1699 : }
1700 8260414 : return mnLangID;
1701 : }
1702 :
1703 :
1704 0 : void LanguageTag::getIsoLanguageScriptCountry( OUString& rLanguage, OUString& rScript, OUString& rCountry ) const
1705 : {
1706 : // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
1707 : // and getCountry() to work correctly in this context.
1708 0 : if (isIsoODF())
1709 : {
1710 0 : rLanguage = getLanguage();
1711 0 : rScript = getScript();
1712 0 : rCountry = getCountry();
1713 : }
1714 : else
1715 : {
1716 0 : rLanguage = (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
1717 0 : rScript = (LanguageTag::isIsoScript( getScript()) ? getScript() : OUString());
1718 0 : rCountry = (LanguageTag::isIsoCountry( getCountry()) ? getCountry() : OUString());
1719 : }
1720 0 : }
1721 :
1722 :
1723 : namespace
1724 : {
1725 :
1726 459452 : inline bool isLowerAscii( sal_Unicode c )
1727 : {
1728 459452 : return 'a' <= c && c <= 'z';
1729 : }
1730 :
1731 394854 : inline bool isUpperAscii( sal_Unicode c )
1732 : {
1733 394854 : return 'A' <= c && c <= 'Z';
1734 : }
1735 :
1736 : }
1737 :
1738 :
1739 : // static
1740 198705 : bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
1741 : {
1742 : /* TODO: ignore case? For now let's see where rubbish is used. */
1743 : bool b2chars;
1744 658167 : if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) &&
1745 794787 : isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
1746 62052 : (b2chars || isLowerAscii( rLanguage[2])))
1747 198694 : return true;
1748 : SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
1749 : (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
1750 : (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
1751 : "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
1752 11 : return false;
1753 : }
1754 :
1755 :
1756 : // static
1757 224584 : bool LanguageTag::isIsoCountry( const OUString& rRegion )
1758 : {
1759 : /* TODO: ignore case? For now let's see where rubbish is used. */
1760 646593 : if (rRegion.isEmpty() ||
1761 394850 : (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
1762 224584 : return true;
1763 : SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
1764 : "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
1765 0 : return false;
1766 : }
1767 :
1768 :
1769 : // static
1770 3472 : bool LanguageTag::isIsoScript( const OUString& rScript )
1771 : {
1772 : /* TODO: ignore case? For now let's see where rubbish is used. */
1773 6948 : if (rScript.isEmpty() ||
1774 8 : (rScript.getLength() == 4 &&
1775 12 : isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
1776 8 : isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
1777 3472 : return true;
1778 : SAL_WARN_IF( rScript.getLength() == 4 &&
1779 : (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
1780 : isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
1781 : "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
1782 0 : return false;
1783 : }
1784 :
1785 :
1786 201022 : OUString LanguageTagImpl::getLanguage() const
1787 : {
1788 201022 : if (!mbCachedLanguage)
1789 : {
1790 199213 : maCachedLanguage = const_cast<LanguageTagImpl*>(this)->getLanguageFromLangtag();
1791 199213 : mbCachedLanguage = true;
1792 : }
1793 201022 : return maCachedLanguage;
1794 : }
1795 :
1796 :
1797 1383130 : OUString LanguageTag::getLanguage() const
1798 : {
1799 1383130 : ImplPtr pImpl = getImpl();
1800 1383130 : if (pImpl->mbCachedLanguage)
1801 1380815 : return pImpl->maCachedLanguage;
1802 4630 : OUString aRet( pImpl->getLanguage());
1803 2315 : const_cast<LanguageTag*>(this)->syncFromImpl();
1804 1385445 : return aRet;
1805 : }
1806 :
1807 :
1808 3472 : OUString LanguageTagImpl::getScript() const
1809 : {
1810 3472 : if (!mbCachedScript)
1811 : {
1812 9 : maCachedScript = const_cast<LanguageTagImpl*>(this)->getScriptFromLangtag();
1813 9 : mbCachedScript = true;
1814 : }
1815 3472 : return maCachedScript;
1816 : }
1817 :
1818 :
1819 49555 : OUString LanguageTag::getScript() const
1820 : {
1821 49555 : ImplPtr pImpl = getImpl();
1822 49555 : if (pImpl->mbCachedScript)
1823 49555 : return pImpl->maCachedScript;
1824 0 : OUString aRet( pImpl->getScript());
1825 0 : const_cast<LanguageTag*>(this)->syncFromImpl();
1826 49555 : return aRet;
1827 : }
1828 :
1829 :
1830 11641 : OUString LanguageTag::getLanguageAndScript() const
1831 : {
1832 11641 : OUString aLanguageScript( getLanguage());
1833 23282 : OUString aScript( getScript());
1834 11641 : if (!aScript.isEmpty())
1835 : {
1836 3 : aLanguageScript += "-" + aScript;
1837 : }
1838 23282 : return aLanguageScript;
1839 : }
1840 :
1841 :
1842 29418 : OUString LanguageTagImpl::getCountry() const
1843 : {
1844 29418 : if (!mbCachedCountry)
1845 : {
1846 25890 : maCachedCountry = const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1847 25890 : if (!LanguageTag::isIsoCountry( maCachedCountry))
1848 0 : maCachedCountry = OUString();
1849 25890 : mbCachedCountry = true;
1850 : }
1851 29418 : return maCachedCountry;
1852 : }
1853 :
1854 :
1855 419959 : OUString LanguageTag::getCountry() const
1856 : {
1857 419959 : ImplPtr pImpl = getImpl();
1858 419959 : if (pImpl->mbCachedCountry)
1859 419954 : return pImpl->maCachedCountry;
1860 10 : OUString aRet( pImpl->getCountry());
1861 5 : const_cast<LanguageTag*>(this)->syncFromImpl();
1862 419964 : return aRet;
1863 : }
1864 :
1865 :
1866 198694 : OUString LanguageTagImpl::getRegion() const
1867 : {
1868 198694 : return const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1869 : }
1870 :
1871 :
1872 5 : OUString LanguageTagImpl::getVariants() const
1873 : {
1874 5 : if (!mbCachedVariants)
1875 : {
1876 1 : maCachedVariants = const_cast<LanguageTagImpl*>(this)->getVariantsFromLangtag();
1877 1 : mbCachedVariants = true;
1878 : }
1879 5 : return maCachedVariants;
1880 : }
1881 :
1882 :
1883 40192 : OUString LanguageTag::getVariants() const
1884 : {
1885 40192 : ImplPtr pImpl = getImpl();
1886 40192 : if (pImpl->mbCachedVariants)
1887 40192 : return pImpl->maCachedVariants;
1888 0 : OUString aRet( pImpl->getVariants());
1889 0 : const_cast<LanguageTag*>(this)->syncFromImpl();
1890 40192 : return aRet;
1891 : }
1892 :
1893 :
1894 0 : OUString LanguageTag::getGlibcLocaleString( const OUString & rEncoding ) const
1895 : {
1896 0 : OUString aRet;
1897 0 : if (isIsoLocale())
1898 : {
1899 0 : OUString aCountry( getCountry());
1900 0 : if (aCountry.isEmpty())
1901 0 : aRet = getLanguage() + rEncoding;
1902 : else
1903 0 : aRet = getLanguage() + "_" + aCountry + rEncoding;
1904 : }
1905 : else
1906 : {
1907 : /* FIXME: use the aImplIsoLangGLIBCModifiersEntries table from
1908 : * i18nlangtag/source/isolang/isolang.cxx or let liblangtag handle it.
1909 : * So far no code was prepared for anything else than a simple
1910 : * language_country locale so we don't loose anything here right now.
1911 : * */
1912 : }
1913 0 : return aRet;
1914 : }
1915 :
1916 :
1917 40486 : bool LanguageTagImpl::hasScript() const
1918 : {
1919 40486 : if (!mbCachedScript)
1920 0 : getScript();
1921 40486 : return !maCachedScript.isEmpty();
1922 : }
1923 :
1924 :
1925 40486 : bool LanguageTag::hasScript() const
1926 : {
1927 40486 : bool bRet = getImpl()->hasScript();
1928 40486 : const_cast<LanguageTag*>(this)->syncFromImpl();
1929 40486 : return bRet;
1930 : }
1931 :
1932 :
1933 225012 : bool LanguageTagImpl::cacheSimpleLSCV()
1934 : {
1935 450024 : OUString aLanguage, aScript, aCountry, aVariants;
1936 225012 : Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
1937 225012 : bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV);
1938 225012 : if (bRet)
1939 : {
1940 225007 : maCachedLanguage = aLanguage;
1941 225007 : maCachedScript = aScript;
1942 225007 : maCachedCountry = aCountry;
1943 225007 : maCachedVariants = aVariants;
1944 225007 : mbCachedLanguage = mbCachedScript = mbCachedCountry = mbCachedVariants = true;
1945 : }
1946 450024 : return bRet;
1947 : }
1948 :
1949 :
1950 599062 : bool LanguageTagImpl::isIsoLocale() const
1951 : {
1952 599062 : if (meIsIsoLocale == DECISION_DONTKNOW)
1953 : {
1954 224572 : const_cast<LanguageTagImpl*>(this)->synCanonicalize();
1955 : // It must be at most ll-CC or lll-CC
1956 : // Do not use getCountry() here, use getRegion() instead.
1957 449144 : meIsIsoLocale = ((maBcp47.isEmpty() ||
1958 1243912 : (maBcp47.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
1959 1071088 : LanguageTag::isIsoCountry( getRegion()))) ? DECISION_YES : DECISION_NO);
1960 : }
1961 599062 : return meIsIsoLocale == DECISION_YES;
1962 : }
1963 :
1964 :
1965 372999 : bool LanguageTag::isIsoLocale() const
1966 : {
1967 372999 : bool bRet = getImpl()->isIsoLocale();
1968 372999 : const_cast<LanguageTag*>(this)->syncFromImpl();
1969 372999 : return bRet;
1970 : }
1971 :
1972 :
1973 3687 : bool LanguageTagImpl::isIsoODF() const
1974 : {
1975 3687 : if (meIsIsoODF == DECISION_DONTKNOW)
1976 : {
1977 3467 : const_cast<LanguageTagImpl*>(this)->synCanonicalize();
1978 3467 : if (!LanguageTag::isIsoScript( getScript()))
1979 0 : return ((meIsIsoODF = DECISION_NO) == DECISION_YES);
1980 : // The usual case is lll-CC so simply check that first.
1981 3467 : if (isIsoLocale())
1982 3455 : return ((meIsIsoODF = DECISION_YES) == DECISION_YES);
1983 : // If this is not ISO locale for which script must not exist it can
1984 : // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
1985 : // ll-vvvvvvvv
1986 54 : meIsIsoODF = ((maBcp47.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
1987 37 : LanguageTag::isIsoCountry( getRegion()) && LanguageTag::isIsoScript( getScript()) &&
1988 38 : getVariants().isEmpty()) ? DECISION_YES : DECISION_NO);
1989 : }
1990 232 : return meIsIsoODF == DECISION_YES;
1991 : }
1992 :
1993 :
1994 241 : bool LanguageTag::isIsoODF() const
1995 : {
1996 241 : bool bRet = getImpl()->isIsoODF();
1997 241 : const_cast<LanguageTag*>(this)->syncFromImpl();
1998 241 : return bRet;
1999 : }
2000 :
2001 :
2002 4050 : bool LanguageTagImpl::isValidBcp47() const
2003 : {
2004 4050 : if (meIsValid == DECISION_DONTKNOW)
2005 : {
2006 0 : const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2007 : SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
2008 : "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
2009 : }
2010 4050 : return meIsValid == DECISION_YES;
2011 : }
2012 :
2013 :
2014 518 : bool LanguageTag::isValidBcp47() const
2015 : {
2016 518 : bool bRet = getImpl()->isValidBcp47();
2017 518 : const_cast<LanguageTag*>(this)->syncFromImpl();
2018 518 : return bRet;
2019 : }
2020 :
2021 :
2022 6193426 : bool LanguageTag::isSystemLocale() const
2023 : {
2024 6193426 : return mbSystemLocale;
2025 : }
2026 :
2027 :
2028 1683 : LanguageTag & LanguageTag::makeFallback()
2029 : {
2030 1683 : if (!mbIsFallback)
2031 : {
2032 1683 : const lang::Locale& rLocale1 = getLocale( true);
2033 1683 : lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
2034 5049 : if ( rLocale1.Language != aLocale2.Language ||
2035 3257 : rLocale1.Country != aLocale2.Country ||
2036 1574 : rLocale1.Variant != aLocale2.Variant)
2037 : {
2038 109 : if (rLocale1.Language != "en" && aLocale2.Language == "en" && aLocale2.Country == "US")
2039 : {
2040 : // "en-US" is the last resort fallback, try if we get a better
2041 : // one for the fallback hierarchy of a non-"en" locale.
2042 0 : ::std::vector< OUString > aFallbacks( getFallbackStrings( false));
2043 0 : for (::std::vector< OUString >::const_iterator it( aFallbacks.begin()); it != aFallbacks.end(); ++it)
2044 : {
2045 0 : lang::Locale aLocale3( LanguageTag( *it).getLocale());
2046 0 : aLocale2 = MsLangId::Conversion::lookupFallbackLocale( aLocale3);
2047 0 : if (aLocale2.Language != "en" || aLocale2.Country != "US")
2048 0 : break; // for, success
2049 0 : }
2050 : }
2051 : SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
2052 : rLocale1.Language << "," << rLocale1.Country << "," << rLocale1.Variant << ") to (" <<
2053 : aLocale2.Language << "," << aLocale2.Country << "," << aLocale2.Variant << ")");
2054 109 : reset( aLocale2);
2055 : }
2056 1683 : mbIsFallback = true;
2057 : }
2058 1683 : return *this;
2059 : }
2060 :
2061 :
2062 : /* TODO: maybe this now could take advantage of the mnOverride field in
2063 : * isolang.cxx entries and search for kSAME instead of harcoded special
2064 : * fallbacks. Though iterating through those tables would be slower and even
2065 : * then there would be some special cases, but we wouldn't lack entries that
2066 : * were missed out. */
2067 290340 : ::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const
2068 : {
2069 290340 : ::std::vector< OUString > aVec;
2070 580680 : OUString aLanguage( getLanguage());
2071 580680 : OUString aCountry( getCountry());
2072 290340 : if (isIsoLocale())
2073 : {
2074 250150 : if (!aCountry.isEmpty())
2075 : {
2076 210849 : if (bIncludeFullBcp47)
2077 200153 : aVec.push_back( aLanguage + "-" + aCountry);
2078 210849 : if (aLanguage == "zh")
2079 : {
2080 : // For zh-HK or zh-MO also list zh-TW, for all other zh-XX also
2081 : // list zh-CN.
2082 2736 : if (aCountry == "HK" || aCountry == "MO")
2083 912 : aVec.push_back( aLanguage + "-TW");
2084 1824 : else if (aCountry != "CN")
2085 912 : aVec.push_back( aLanguage + "-CN");
2086 2736 : aVec.push_back( aLanguage);
2087 : }
2088 208113 : else if (aLanguage == "sh")
2089 : {
2090 : // Manual list instead of calling
2091 : // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
2092 : // that would also include "sh-*" again.
2093 0 : aVec.push_back( "sr-Latn-" + aCountry);
2094 0 : aVec.push_back( "sr-Latn");
2095 0 : aVec.push_back( "sh"); // legacy with script, before default script with country
2096 0 : aVec.push_back( "sr-" + aCountry);
2097 0 : aVec.push_back( "sr");
2098 : }
2099 208113 : else if (aLanguage == "ca" && aCountry == "XV")
2100 : {
2101 0 : ::std::vector< OUString > aRep( LanguageTag( "ca-ES-valencia").getFallbackStrings( true));
2102 0 : aVec.insert( aVec.end(), aRep.begin(), aRep.end());
2103 : // Already includes 'ca' language fallback.
2104 : }
2105 208113 : else if (aLanguage == "ku")
2106 : {
2107 0 : if (aCountry == "TR" || aCountry == "SY")
2108 : {
2109 0 : aVec.push_back( "kmr-Latn-" + aCountry);
2110 0 : aVec.push_back( "kmr-" + aCountry);
2111 0 : aVec.push_back( "kmr-Latn");
2112 0 : aVec.push_back( "kmr");
2113 0 : aVec.push_back( aLanguage);
2114 : }
2115 0 : else if (aCountry == "IQ" || aCountry == "IR")
2116 : {
2117 0 : aVec.push_back( "ckb-" + aCountry);
2118 0 : aVec.push_back( "ckb");
2119 : }
2120 : }
2121 208113 : else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY"))
2122 : {
2123 0 : aVec.push_back( "ku-Latn-" + aCountry);
2124 0 : aVec.push_back( "ku-" + aCountry);
2125 0 : aVec.push_back( aLanguage);
2126 0 : aVec.push_back( "ku");
2127 : }
2128 208113 : else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR"))
2129 : {
2130 2736 : aVec.push_back( "ku-Arab-" + aCountry);
2131 2736 : aVec.push_back( "ku-" + aCountry);
2132 2736 : aVec.push_back( aLanguage);
2133 : // not 'ku' only, that was used for Latin script
2134 : }
2135 : else
2136 205377 : aVec.push_back( aLanguage);
2137 : }
2138 : else
2139 : {
2140 39301 : if (bIncludeFullBcp47)
2141 39301 : aVec.push_back( aLanguage);
2142 39301 : if (aLanguage == "sh")
2143 : {
2144 0 : aVec.push_back( "sr-Latn");
2145 0 : aVec.push_back( "sr");
2146 : }
2147 39301 : else if (aLanguage == "pli")
2148 : {
2149 : // a special case for Pali dictionary, see fdo#41599
2150 0 : aVec.push_back( "pi-Latn");
2151 0 : aVec.push_back( "pi");
2152 : }
2153 : }
2154 250150 : return aVec;
2155 : }
2156 :
2157 40190 : getBcp47(); // have maBcp47 now
2158 40190 : if (bIncludeFullBcp47)
2159 40190 : aVec.push_back( maBcp47);
2160 80380 : OUString aScript;
2161 80380 : OUString aVariants( getVariants());
2162 80380 : OUString aTmp;
2163 40190 : if (hasScript())
2164 : {
2165 37905 : aScript = getScript();
2166 37905 : bool bHaveLanguageScriptVariant = false;
2167 37905 : if (!aCountry.isEmpty())
2168 : {
2169 25573 : if (!aVariants.isEmpty())
2170 : {
2171 0 : aTmp = aLanguage + "-" + aScript + "-" + aCountry + "-" + aVariants;
2172 0 : if (aTmp != maBcp47)
2173 0 : aVec.push_back( aTmp);
2174 : // Language with variant but without country before language
2175 : // without variant but with country.
2176 0 : aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2177 0 : if (aTmp != maBcp47)
2178 0 : aVec.push_back( aTmp);
2179 0 : bHaveLanguageScriptVariant = true;
2180 : }
2181 25573 : aTmp = aLanguage + "-" + aScript + "-" + aCountry;
2182 25573 : if (aTmp != maBcp47)
2183 0 : aVec.push_back( aTmp);
2184 25573 : if (aLanguage == "sr" && aScript == "Latn")
2185 : {
2186 : // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
2187 6386 : if (aCountry == "CS")
2188 : {
2189 1374 : aVec.push_back( "sr-Latn-YU");
2190 1374 : aVec.push_back( "sh-CS");
2191 1374 : aVec.push_back( "sh-YU");
2192 : }
2193 : else
2194 5012 : aVec.push_back( "sh-" + aCountry);
2195 : }
2196 19187 : else if (aLanguage == "pi" && aScript == "Latn")
2197 0 : aVec.push_back( "pli"); // a special case for Pali dictionary, see fdo#41599
2198 19187 : else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY"))
2199 0 : aVec.push_back( "ku-" + aCountry);
2200 : }
2201 37905 : if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant)
2202 : {
2203 0 : aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2204 0 : if (aTmp != maBcp47)
2205 0 : aVec.push_back( aTmp);
2206 : }
2207 37905 : aTmp = aLanguage + "-" + aScript;
2208 37905 : if (aTmp != maBcp47)
2209 25573 : aVec.push_back( aTmp);
2210 :
2211 : // 'sh' actually denoted a script, so have it here instead of appended
2212 : // at the end as language-only.
2213 37905 : if (aLanguage == "sr" && aScript == "Latn")
2214 7754 : aVec.push_back( "sh");
2215 30151 : else if (aLanguage == "ku" && aScript == "Arab")
2216 0 : aVec.push_back( "ckb");
2217 : // 'ku' only denoted Latin script
2218 30151 : else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty())
2219 0 : aVec.push_back( "ku");
2220 : }
2221 40190 : bool bHaveLanguageVariant = false;
2222 40190 : if (!aCountry.isEmpty())
2223 : {
2224 27856 : if (!aVariants.isEmpty())
2225 : {
2226 2283 : aTmp = aLanguage + "-" + aCountry + "-" + aVariants;
2227 2283 : if (aTmp != maBcp47)
2228 0 : aVec.push_back( aTmp);
2229 2283 : if (maBcp47 == "ca-ES-valencia")
2230 1826 : aVec.push_back( "ca-XV");
2231 : // Language with variant but without country before language
2232 : // without variant but with country.
2233 : // But only if variant is not from a grandfathered tag that
2234 : // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
2235 : // not.
2236 4566 : if (aVariants.getLength() >= 5 ||
2237 457 : (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2238 : {
2239 1826 : aTmp = aLanguage + "-" + aVariants;
2240 1826 : if (aTmp != maBcp47)
2241 1826 : aVec.push_back( aTmp);
2242 1826 : bHaveLanguageVariant = true;
2243 : }
2244 : }
2245 27856 : aTmp = aLanguage + "-" + aCountry;
2246 27856 : if (aTmp != maBcp47)
2247 27856 : aVec.push_back( aTmp);
2248 : }
2249 40190 : if (!aVariants.isEmpty() && !bHaveLanguageVariant)
2250 : {
2251 : // Only if variant is not from a grandfathered tag that wouldn't match
2252 : // the rules, i.e. "de-1901" is fine but "en-oed" is not.
2253 919 : if (aVariants.getLength() >= 5 ||
2254 459 : (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2255 : {
2256 2 : aTmp = aLanguage + "-" + aVariants;
2257 2 : if (aTmp != maBcp47)
2258 0 : aVec.push_back( aTmp);
2259 : }
2260 : }
2261 :
2262 : // Insert legacy fallbacks with country before language-only, but only
2263 : // default script, script was handled already above.
2264 40190 : if (!aCountry.isEmpty())
2265 : {
2266 27856 : if (aLanguage == "sr" && aCountry == "CS")
2267 1374 : aVec.push_back( "sr-YU");
2268 : }
2269 :
2270 : // Original language-only.
2271 40190 : if (aLanguage != maBcp47)
2272 40190 : aVec.push_back( aLanguage);
2273 :
2274 40190 : return aVec;
2275 : }
2276 :
2277 :
2278 0 : bool LanguageTag::equals( const LanguageTag & rLanguageTag, bool bResolveSystem ) const
2279 : {
2280 : // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
2281 : // can use the operator==() optimization.
2282 0 : if (!bResolveSystem || isSystemLocale() == rLanguageTag.isSystemLocale())
2283 0 : return operator==( rLanguageTag);
2284 :
2285 : // Compare full language tag strings.
2286 0 : return getBcp47( bResolveSystem) == rLanguageTag.getBcp47( bResolveSystem);
2287 : }
2288 :
2289 :
2290 1279441 : bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
2291 : {
2292 1279441 : if (isSystemLocale() && rLanguageTag.isSystemLocale())
2293 202940 : return true; // both SYSTEM
2294 :
2295 : // No need to convert to BCP47 if both Lang-IDs are available.
2296 1076501 : if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
2297 : {
2298 : // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
2299 1042614 : return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
2300 : }
2301 :
2302 : // Compare full language tag strings but SYSTEM unresolved.
2303 33887 : return getBcp47( false) == rLanguageTag.getBcp47( false);
2304 : }
2305 :
2306 :
2307 1279441 : bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
2308 : {
2309 1279441 : return !operator==( rLanguageTag);
2310 : }
2311 :
2312 :
2313 99 : bool LanguageTag::operator<( const LanguageTag & rLanguageTag ) const
2314 : {
2315 99 : return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag.getBcp47( false)) < 0;
2316 : }
2317 :
2318 :
2319 : // static
2320 465163 : LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp47,
2321 : OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rVariants )
2322 : {
2323 465163 : Extraction eRet = EXTRACTED_NONE;
2324 465163 : const sal_Int32 nLen = rBcp47.getLength();
2325 465163 : const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
2326 465163 : sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
2327 465163 : sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
2328 465163 : sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
2329 465163 : if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker
2330 : {
2331 : // It's f*d up but we need to recognize this.
2332 6 : eRet = EXTRACTED_X_JOKER;
2333 : }
2334 465157 : else if (nHyph1 == 1 && rBcp47[0] == 'x') // x-... privateuse
2335 : {
2336 : // x-... privateuse tags MUST be known to us by definition.
2337 7 : eRet = EXTRACTED_X;
2338 : }
2339 465150 : else if (nLen == 2 || nLen == 3) // ll or lll
2340 : {
2341 69542 : if (nHyph1 < 0)
2342 : {
2343 34771 : rLanguage = rBcp47.toAsciiLowerCase();
2344 34771 : rScript = rCountry = rVariants = OUString();
2345 34771 : eRet = EXTRACTED_LSC;
2346 : }
2347 : }
2348 430379 : else if ( (nHyph1 == 2 && nLen == 5) // ll-CC
2349 173733 : || (nHyph1 == 3 && nLen == 6)) // lll-CC
2350 : {
2351 744972 : if (nHyph2 < 0)
2352 : {
2353 372486 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2354 372486 : rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2355 372486 : rScript = rVariants = OUString();
2356 372486 : eRet = EXTRACTED_LSC;
2357 : }
2358 : }
2359 57893 : else if ( (nHyph1 == 2 && nLen == 7) // ll-Ssss or ll-vvvv
2360 39759 : || (nHyph1 == 3 && nLen == 8)) // lll-Ssss or lll-vvvv
2361 : {
2362 21762 : if (nHyph2 < 0)
2363 : {
2364 21762 : sal_Unicode c = rBcp47[nHyph1+1];
2365 21762 : if ('0' <= c && c <= '9')
2366 : {
2367 : // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
2368 1 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2369 1 : rScript = rCountry = OUString();
2370 1 : rVariants = rBcp47.copy( nHyph1 + 1);
2371 1 : eRet = EXTRACTED_LV;
2372 : }
2373 : else
2374 : {
2375 21761 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2376 43522 : rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() +
2377 65283 : rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2378 21761 : rCountry = rVariants = OUString();
2379 21761 : eRet = EXTRACTED_LSC;
2380 : }
2381 21762 : }
2382 : }
2383 36131 : else if ( (nHyph1 == 2 && nHyph2 == 7 && nLen == 10) // ll-Ssss-CC
2384 12309 : || (nHyph1 == 3 && nHyph2 == 8 && nLen == 11)) // lll-Ssss-CC
2385 : {
2386 68612 : if (nHyph3 < 0)
2387 : {
2388 34306 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2389 34306 : rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2390 34306 : rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2391 34306 : rVariants = OUString();
2392 34306 : eRet = EXTRACTED_LSC;
2393 : }
2394 : }
2395 1825 : else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15) // ll-Ssss-CC-vvvv[vvvv][-...]
2396 1825 : || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16)) // lll-Ssss-CC-vvvv[vvvv][-...]
2397 : {
2398 0 : if (nHyph4 < 0)
2399 0 : nHyph4 = rBcp47.getLength();
2400 0 : if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
2401 : {
2402 0 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2403 0 : rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2404 0 : rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2405 0 : rVariants = rBcp47.copy( nHyph3 + 1);
2406 0 : eRet = EXTRACTED_LV;
2407 : }
2408 : }
2409 1825 : else if ( (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10) // ll-CC-vvvv[vvvv][-...]
2410 913 : || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11)) // lll-CC-vvvv[vvvv][-...]
2411 : {
2412 912 : if (nHyph3 < 0)
2413 912 : nHyph3 = rBcp47.getLength();
2414 1824 : if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
2415 : {
2416 912 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2417 912 : rScript = OUString();
2418 912 : rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2419 912 : rVariants = rBcp47.copy( nHyph2 + 1);
2420 912 : eRet = EXTRACTED_LV;
2421 : }
2422 : }
2423 913 : else if ( (nHyph1 == 2 && nLen >= 8) // ll-vvvvv[vvv][-...]
2424 3 : || (nHyph1 == 3 && nLen >= 9)) // lll-vvvvv[vvv][-...]
2425 : {
2426 910 : if (nHyph2 < 0)
2427 3 : nHyph2 = rBcp47.getLength();
2428 910 : if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)
2429 : {
2430 2 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2431 2 : rScript = rCountry = OUString();
2432 2 : rVariants = rBcp47.copy( nHyph1 + 1);
2433 2 : eRet = EXTRACTED_LV;
2434 : }
2435 : else
2436 : {
2437 : // Known and handled grandfathered; ugly but effective ...
2438 : // Note that nLen must have matched above.
2439 : // Strictly not a variant, but so far we treat it as such.
2440 908 : if (rBcp47.equalsIgnoreAsciiCase( "en-GB-oed"))
2441 : {
2442 907 : rLanguage = "en";
2443 907 : rScript = OUString();
2444 907 : rCountry = "GB";
2445 907 : rVariants = "oed";
2446 907 : eRet = EXTRACTED_LV;
2447 : }
2448 : }
2449 : }
2450 465163 : if (eRet == EXTRACTED_NONE)
2451 : {
2452 : SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'");
2453 4 : rLanguage = rScript = rCountry = rVariants = OUString();
2454 : }
2455 465163 : return eRet;
2456 : }
2457 :
2458 :
2459 : // static
2460 28290 : ::std::vector< OUString >::const_iterator LanguageTag::getFallback(
2461 : const ::std::vector< OUString > & rList, const OUString & rReference )
2462 : {
2463 28290 : if (rList.empty())
2464 2640 : return rList.end();
2465 :
2466 25650 : ::std::vector< OUString >::const_iterator it;
2467 :
2468 : // Try the simple case first without constructing fallbacks.
2469 36338 : for (it = rList.begin(); it != rList.end(); ++it)
2470 : {
2471 25650 : if (*it == rReference)
2472 14962 : return it; // exact match
2473 : }
2474 :
2475 10688 : ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2476 10688 : if (rReference != "en-US")
2477 : {
2478 0 : aFallbacks.push_back( "en-US");
2479 0 : if (rReference != "en")
2480 0 : aFallbacks.push_back( "en");
2481 : }
2482 10688 : if (rReference != "x-default")
2483 10688 : aFallbacks.push_back( "x-default");
2484 10688 : if (rReference != "x-no-translate")
2485 10688 : aFallbacks.push_back( "x-no-translate");
2486 : /* TODO: the original comphelper::Locale::getFallback() code had
2487 : * "x-notranslate" instead of "x-no-translate", but all .xcu files use
2488 : * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
2489 : * Did that ever work? Was it supposed to work at all like this? */
2490 :
2491 41730 : for (::std::vector< OUString >::const_iterator fb = aFallbacks.begin(); fb != aFallbacks.end(); ++fb)
2492 : {
2493 62595 : for (it = rList.begin(); it != rList.end(); ++it)
2494 : {
2495 31553 : if (*it == *fb)
2496 511 : return it; // fallback found
2497 : }
2498 : }
2499 :
2500 : // Did not find anything so return something of the list, the first value
2501 : // will do as well as any other as none did match any of the possible
2502 : // fallbacks.
2503 10177 : return rList.begin();
2504 : }
2505 :
2506 :
2507 : // static
2508 0 : ::std::vector< com::sun::star::lang::Locale >::const_iterator LanguageTag::getMatchingFallback(
2509 : const ::std::vector< com::sun::star::lang::Locale > & rList,
2510 : const com::sun::star::lang::Locale & rReference )
2511 : {
2512 0 : if (rList.empty())
2513 0 : return rList.end();
2514 :
2515 0 : ::std::vector< lang::Locale >::const_iterator it;
2516 :
2517 : // Try the simple case first without constructing fallbacks.
2518 0 : for (it = rList.begin(); it != rList.end(); ++it)
2519 : {
2520 0 : if ( (*it).Language == rReference.Language &&
2521 0 : (*it).Country == rReference.Country &&
2522 0 : (*it).Variant == rReference.Variant)
2523 0 : return it; // exact match
2524 : }
2525 :
2526 : // Now for each reference fallback test the fallbacks of the list in order.
2527 0 : ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2528 0 : ::std::vector< ::std::vector< OUString > > aListFallbacks( rList.size());
2529 0 : size_t i = 0;
2530 0 : for (it = rList.begin(); it != rList.end(); ++it, ++i)
2531 : {
2532 0 : ::std::vector< OUString > aTmp( LanguageTag( *it).getFallbackStrings( true));
2533 0 : aListFallbacks[i] = aTmp;
2534 0 : }
2535 0 : for (::std::vector< OUString >::const_iterator rfb( aFallbacks.begin()); rfb != aFallbacks.end(); ++rfb)
2536 : {
2537 0 : for (::std::vector< ::std::vector< OUString > >::const_iterator lfb( aListFallbacks.begin());
2538 0 : lfb != aListFallbacks.end(); ++lfb)
2539 : {
2540 0 : for (::std::vector< OUString >::const_iterator fb( (*lfb).begin()); fb != (*lfb).end(); ++fb)
2541 : {
2542 0 : if (*rfb == *fb)
2543 0 : return rList.begin() + (lfb - aListFallbacks.begin());
2544 : }
2545 : }
2546 : }
2547 :
2548 : // No match found.
2549 0 : return rList.end();
2550 : }
2551 :
2552 :
2553 228 : static bool lcl_isSystem( LanguageType nLangID )
2554 : {
2555 228 : if (nLangID == LANGUAGE_SYSTEM)
2556 195 : return true;
2557 : // There are some special values that simplify to SYSTEM,
2558 : // getRealLanguage() catches and resolves them.
2559 33 : LanguageType nNewLangID = MsLangId::getRealLanguage( nLangID);
2560 33 : if (nNewLangID != nLangID)
2561 0 : return true;
2562 33 : return false;
2563 : }
2564 :
2565 :
2566 : // static
2567 298075 : com::sun::star::lang::Locale LanguageTag::convertToLocale( LanguageType nLangID, bool bResolveSystem )
2568 : {
2569 298075 : if (!bResolveSystem && lcl_isSystem( nLangID))
2570 195 : return lang::Locale();
2571 :
2572 297880 : return LanguageTag( nLangID).getLocale( bResolveSystem);
2573 : }
2574 :
2575 :
2576 : // static
2577 522973 : LanguageType LanguageTag::convertToLanguageType( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem )
2578 : {
2579 522973 : if (rLocale.Language.isEmpty() && !bResolveSystem)
2580 41166 : return LANGUAGE_SYSTEM;
2581 :
2582 481807 : return LanguageTag( rLocale).getLanguageType( bResolveSystem);
2583 : }
2584 :
2585 :
2586 : // static
2587 775730 : OUString LanguageTagImpl::convertToBcp47( const com::sun::star::lang::Locale& rLocale )
2588 : {
2589 775730 : OUString aBcp47;
2590 775730 : if (rLocale.Language.isEmpty())
2591 : {
2592 : // aBcp47 stays empty
2593 : }
2594 775730 : else if (rLocale.Language == I18NLANGTAG_QLT)
2595 : {
2596 47563 : aBcp47 = rLocale.Variant;
2597 : }
2598 : else
2599 : {
2600 : /* XXX NOTE: most legacy code never evaluated the Variant field, so for
2601 : * now just concatenate language and country. In case we stumbled over
2602 : * variant aware code we'd have to take care of that. */
2603 728167 : if (rLocale.Country.isEmpty())
2604 116745 : aBcp47 = rLocale.Language;
2605 : else
2606 : {
2607 611422 : aBcp47 = rLocale.Language + "-" + rLocale.Country;
2608 : }
2609 : }
2610 775730 : return aBcp47;
2611 : }
2612 :
2613 :
2614 : // static
2615 4092 : OUString LanguageTag::convertToBcp47( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem )
2616 : {
2617 4092 : OUString aBcp47;
2618 4092 : if (rLocale.Language.isEmpty())
2619 : {
2620 5 : if (bResolveSystem)
2621 1 : aBcp47 = LanguageTag::convertToBcp47( LANGUAGE_SYSTEM, true);
2622 : // else aBcp47 stays empty
2623 : }
2624 : else
2625 : {
2626 4087 : aBcp47 = LanguageTagImpl::convertToBcp47( rLocale);
2627 : }
2628 4092 : return aBcp47;
2629 : }
2630 :
2631 :
2632 : // static
2633 273 : OUString LanguageTag::convertToBcp47( LanguageType nLangID, bool bResolveSystem )
2634 : {
2635 : // Catch this first so we don't need the rest.
2636 273 : if (!bResolveSystem && lcl_isSystem( nLangID))
2637 0 : return OUString();
2638 :
2639 273 : lang::Locale aLocale( LanguageTag::convertToLocale( nLangID, bResolveSystem));
2640 : // If system for some reason (should not happen.. haha) could not be
2641 : // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
2642 : // would recurse into this method here!
2643 273 : if (aLocale.Language.isEmpty() && bResolveSystem)
2644 0 : return OUString(); // bad luck, bail out
2645 273 : return LanguageTagImpl::convertToBcp47( aLocale);
2646 : }
2647 :
2648 :
2649 : // static
2650 34138 : com::sun::star::lang::Locale LanguageTag::convertToLocale( const OUString& rBcp47, bool bResolveSystem )
2651 : {
2652 34138 : if (rBcp47.isEmpty() && !bResolveSystem)
2653 0 : return lang::Locale();
2654 :
2655 34138 : return LanguageTag( rBcp47).getLocale( bResolveSystem);
2656 : }
2657 :
2658 :
2659 : // static
2660 885 : LanguageType LanguageTag::convertToLanguageType( const OUString& rBcp47, bool bResolveSystem )
2661 : {
2662 885 : if (rBcp47.isEmpty() && !bResolveSystem)
2663 0 : return LANGUAGE_SYSTEM;
2664 :
2665 885 : return LanguageTag( rBcp47).getLanguageType( bResolveSystem);
2666 : }
2667 :
2668 :
2669 : // static
2670 1241 : LanguageType LanguageTag::convertToLanguageTypeWithFallback( const OUString& rBcp47 )
2671 : {
2672 1241 : return LanguageTag( rBcp47).makeFallback().getLanguageType( true);
2673 : }
2674 :
2675 :
2676 : // static
2677 0 : com::sun::star::lang::Locale LanguageTag::convertToLocaleWithFallback( const OUString& rBcp47 )
2678 : {
2679 0 : return LanguageTag( rBcp47).makeFallback().getLocale( true);
2680 : }
2681 :
2682 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|