Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : */
9 :
10 : #include <config_folders.h>
11 : #include <config_liblangtag.h>
12 :
13 : #include "i18nlangtag/languagetag.hxx"
14 : #include "i18nlangtag/applelangid.hxx"
15 : #include "i18nlangtag/mslangid.hxx"
16 : #include <rtl/ustrbuf.hxx>
17 : #include <rtl/bootstrap.hxx>
18 : #include <osl/file.hxx>
19 : #include <osl/mutex.hxx>
20 : #include <rtl/instance.hxx>
21 : #include <rtl/locale.h>
22 : #include <map>
23 : #include <unordered_set>
24 :
25 : //#define erDEBUG
26 :
27 : #if defined(ENABLE_LIBLANGTAG)
28 : #if LIBLANGTAG_INLINE_FIX
29 : #define LT_HAVE_INLINE
30 : #endif
31 : #include <liblangtag/langtag.h>
32 : #else
33 : /* Replacement code for LGPL phobic and Android systems.
34 : * For iOS we could probably use NSLocale instead, that should have more or
35 : * less required functionality. If it is good enough, it could be used for Mac
36 : * OS X, too.
37 : */
38 : #include "simple-langtag.cxx"
39 : #endif
40 :
41 : using namespace com::sun::star;
42 :
43 :
44 : // Helper to ensure lt_error_t is free'd
45 : struct myLtError
46 : {
47 : lt_error_t* p;
48 120 : myLtError() : p(NULL) {}
49 120 : ~myLtError() { if (p) lt_error_unref( p); }
50 : };
51 :
52 : // "statics" to be returned as const reference to an empty locale and string.
53 : namespace {
54 : struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
55 : struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
56 : }
57 :
58 : typedef std::unordered_set< OUString, OUStringHash > KnownTagSet;
59 : namespace {
60 : struct theKnowns : public rtl::Static< KnownTagSet, theKnowns > {};
61 : struct theMutex : public rtl::Static< osl::Mutex, theMutex > {};
62 : }
63 :
64 5227 : static const KnownTagSet & getKnowns()
65 : {
66 5227 : KnownTagSet & rKnowns = theKnowns::get();
67 5227 : if (rKnowns.empty())
68 : {
69 227 : osl::MutexGuard aGuard( theMutex::get());
70 227 : if (rKnowns.empty())
71 : {
72 227 : ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
73 378636 : for (::std::vector< MsLangId::LanguagetagMapping >::const_iterator it( aDefined.begin());
74 252424 : it != aDefined.end(); ++it)
75 : {
76 : // Do not use the BCP47 string here to initialize the
77 : // LanguageTag because then canonicalize() would call this
78 : // getKnowns() again..
79 125985 : ::std::vector< OUString > aFallbacks( LanguageTag( (*it).mnLang).getFallbackStrings( true));
80 416091 : for (::std::vector< OUString >::const_iterator fb( aFallbacks.begin()); fb != aFallbacks.end(); ++fb)
81 : {
82 290106 : rKnowns.insert( *fb);
83 : }
84 126212 : }
85 227 : }
86 : }
87 5227 : return rKnowns;
88 : }
89 :
90 :
91 : namespace {
92 : struct compareIgnoreAsciiCaseLess
93 : {
94 8562244273 : bool operator()( const OUString& r1, const OUString& r2 ) const
95 : {
96 8562244273 : return r1.compareToIgnoreAsciiCase( r2) < 0;
97 : }
98 : };
99 : typedef ::std::map< OUString, LanguageTag::ImplPtr, compareIgnoreAsciiCaseLess > MapBcp47;
100 : typedef ::std::map< LanguageType, LanguageTag::ImplPtr > MapLangID;
101 : struct theMapBcp47 : public rtl::Static< MapBcp47, theMapBcp47 > {};
102 : struct theMapLangID : public rtl::Static< MapLangID, theMapLangID > {};
103 : struct theDontKnow : public rtl::Static< LanguageTag::ImplPtr, theDontKnow > {};
104 : struct theSystemLocale : public rtl::Static< LanguageTag::ImplPtr, theSystemLocale > {};
105 : }
106 :
107 :
108 19 : static LanguageType getNextOnTheFlyLanguage()
109 : {
110 : static LanguageType nOnTheFlyLanguage = 0;
111 19 : osl::MutexGuard aGuard( theMutex::get());
112 19 : if (!nOnTheFlyLanguage)
113 13 : nOnTheFlyLanguage = MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START, LANGUAGE_ON_THE_FLY_START);
114 : else
115 : {
116 6 : if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage) != LANGUAGE_ON_THE_FLY_END)
117 6 : ++nOnTheFlyLanguage;
118 : else
119 : {
120 0 : LanguageType nSub = MsLangId::getSubLanguage( nOnTheFlyLanguage);
121 0 : if (nSub != LANGUAGE_ON_THE_FLY_SUB_END)
122 0 : nOnTheFlyLanguage = MsLangId::makeLangID( ++nSub, LANGUAGE_ON_THE_FLY_START);
123 : else
124 : {
125 : SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
126 : << ((LANGUAGE_ON_THE_FLY_END - LANGUAGE_ON_THE_FLY_START + 1)
127 : * (LANGUAGE_ON_THE_FLY_SUB_END - LANGUAGE_ON_THE_FLY_SUB_START + 1))
128 : << " consumed?!?)");
129 0 : return 0;
130 : }
131 : }
132 : }
133 : #if OSL_DEBUG_LEVEL > 0
134 : static size_t nOnTheFlies = 0;
135 : ++nOnTheFlies;
136 : SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies);
137 : #endif
138 19 : return nOnTheFlyLanguage;
139 : }
140 :
141 :
142 : // static
143 203577 : bool LanguageTag::isOnTheFlyID( LanguageType nLang )
144 : {
145 203577 : LanguageType nPri = MsLangId::getPrimaryLanguage( nLang);
146 203577 : LanguageType nSub = MsLangId::getSubLanguage( nLang);
147 : return
148 862 : LANGUAGE_ON_THE_FLY_START <= nPri && nPri <= LANGUAGE_ON_THE_FLY_END &&
149 204439 : LANGUAGE_ON_THE_FLY_SUB_START <= nSub && nSub <= LANGUAGE_ON_THE_FLY_SUB_END;
150 : }
151 :
152 :
153 : /** A reference holder for liblangtag data de/initialization, one static
154 : instance. Currently implemented such that the first "ref" inits and dtor
155 : (our library deinitialized) tears down.
156 : */
157 : class LiblangtagDataRef
158 : {
159 : public:
160 : LiblangtagDataRef();
161 : ~LiblangtagDataRef();
162 75 : inline void init()
163 : {
164 75 : if (!mbInitialized)
165 56 : setup();
166 75 : }
167 : private:
168 : OString maDataPath; // path to liblangtag data, "|" if system
169 : bool mbInitialized;
170 :
171 : void setupDataPath();
172 : void setup();
173 : static void teardown();
174 : };
175 :
176 : namespace {
177 : struct theDataRef : public rtl::Static< LiblangtagDataRef, theDataRef > {};
178 : }
179 :
180 56 : LiblangtagDataRef::LiblangtagDataRef()
181 : :
182 56 : mbInitialized(false)
183 : {
184 56 : }
185 :
186 112 : LiblangtagDataRef::~LiblangtagDataRef()
187 : {
188 56 : if (mbInitialized)
189 56 : teardown();
190 56 : }
191 :
192 56 : void LiblangtagDataRef::setup()
193 : {
194 : SAL_INFO( "i18nlangtag", "LiblangtagDataRef::setup: initializing database");
195 56 : if (maDataPath.isEmpty())
196 56 : setupDataPath();
197 56 : lt_db_initialize();
198 56 : mbInitialized = true;
199 56 : }
200 :
201 56 : void LiblangtagDataRef::teardown()
202 : {
203 : SAL_INFO( "i18nlangtag", "LiblangtagDataRef::teardown: finalizing database");
204 56 : lt_db_finalize();
205 56 : }
206 :
207 56 : void LiblangtagDataRef::setupDataPath()
208 : {
209 : // maDataPath is assumed to be empty here.
210 56 : OUString aURL("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/liblangtag");
211 56 : rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
212 :
213 : // Check if data is in our own installation, else assume system
214 : // installation.
215 112 : OUString aData( aURL);
216 56 : aData += "/language-subtag-registry.xml";
217 112 : osl::DirectoryItem aDirItem;
218 56 : if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
219 : {
220 0 : OUString aPath;
221 0 : if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
222 0 : maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
223 : }
224 56 : if (maDataPath.isEmpty())
225 56 : maDataPath = "|"; // assume system
226 : else
227 56 : lt_db_set_datadir( maDataPath.getStr());
228 56 : }
229 :
230 :
231 : /* TODO: we could transform known vendor and browser-specific variants to known
232 : * BCP 47 if available. For now just remove them to not confuse any later
233 : * treatments that check for empty variants. This vendor stuff was never
234 : * supported anyway. */
235 1390695963 : static void handleVendorVariant( com::sun::star::lang::Locale & rLocale )
236 : {
237 1390695963 : if (!rLocale.Variant.isEmpty() && rLocale.Language != I18NLANGTAG_QLT)
238 7 : rLocale.Variant.clear();
239 1390695963 : }
240 :
241 :
242 : class LanguageTagImpl
243 : {
244 : public:
245 :
246 : explicit LanguageTagImpl( const LanguageTag & rLanguageTag );
247 : explicit LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl );
248 : ~LanguageTagImpl();
249 : LanguageTagImpl& operator=( const LanguageTagImpl & rLanguageTagImpl );
250 :
251 : private:
252 :
253 : friend class LanguageTag;
254 :
255 : enum Decision
256 : {
257 : DECISION_DONTKNOW,
258 : DECISION_NO,
259 : DECISION_YES
260 : };
261 :
262 : mutable com::sun::star::lang::Locale maLocale;
263 : mutable OUString maBcp47;
264 : mutable OUString maCachedLanguage; ///< cache getLanguage()
265 : mutable OUString maCachedScript; ///< cache getScript()
266 : mutable OUString maCachedCountry; ///< cache getCountry()
267 : mutable OUString maCachedVariants; ///< cache getVariants()
268 : mutable lt_tag_t* mpImplLangtag; ///< liblangtag pointer
269 : mutable LanguageType mnLangID;
270 : mutable Decision meIsValid;
271 : mutable Decision meIsIsoLocale;
272 : mutable Decision meIsIsoODF;
273 : mutable Decision meIsLiblangtagNeeded; ///< whether processing with liblangtag needed
274 : bool mbSystemLocale : 1;
275 : mutable bool mbInitializedBcp47 : 1;
276 : mutable bool mbInitializedLocale : 1;
277 : mutable bool mbInitializedLangID : 1;
278 : mutable bool mbCachedLanguage : 1;
279 : mutable bool mbCachedScript : 1;
280 : mutable bool mbCachedCountry : 1;
281 : mutable bool mbCachedVariants : 1;
282 :
283 : const OUString & getBcp47() const;
284 : OUString getLanguage() const;
285 : OUString getScript() const;
286 : OUString getCountry() const;
287 : OUString getRegion() const;
288 : OUString getVariants() const;
289 : bool hasScript() const;
290 :
291 : bool isIsoLocale() const;
292 : bool isIsoODF() const;
293 : bool isValidBcp47() const;
294 :
295 : void convertLocaleToBcp47();
296 : void convertLocaleToLang( bool bAllowOnTheFlyID );
297 : void convertBcp47ToLocale();
298 : void convertBcp47ToLang();
299 : void convertLangToLocale();
300 : void convertLangToBcp47();
301 :
302 : /** @return whether BCP 47 language tag string was changed. */
303 : bool canonicalize();
304 :
305 : /** Canonicalize if not yet done and synchronize initialized conversions.
306 :
307 : @return whether BCP 47 language tag string was changed.
308 : */
309 : bool synCanonicalize();
310 :
311 : OUString getLanguageFromLangtag();
312 : OUString getScriptFromLangtag();
313 : OUString getRegionFromLangtag();
314 : OUString getVariantsFromLangtag();
315 :
316 : /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
317 :
318 : @param nRegisterID
319 : If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
320 : instead of generating an on-the-fly ID. Implementation may
321 : still generate an ID if the suggested ID is already used for
322 : another language tag.
323 :
324 : @return NULL if no ID could be obtained or registration failed.
325 : */
326 : LanguageTag::ImplPtr registerOnTheFly( LanguageType nRegisterID );
327 :
328 : /** Obtain Language, Script, Country and Variants via simpleExtract() and
329 : assign them to the cached variables if successful.
330 :
331 : @return return of simpleExtract()
332 : */
333 : bool cacheSimpleLSCV();
334 :
335 : enum Extraction
336 : {
337 : EXTRACTED_NONE,
338 : EXTRACTED_LSC,
339 : EXTRACTED_LV,
340 : EXTRACTED_X,
341 : EXTRACTED_X_JOKER
342 : };
343 :
344 : /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
345 : portions.
346 :
347 : Does not check case or content!
348 :
349 : @return EXTRACTED_LSC if simple tag was detected (i.e. one that
350 : would fulfill the isIsoODF() condition),
351 : EXTRACTED_LV if a tag with variant was detected,
352 : EXTRACTED_X if x-... privateuse tag was detected,
353 : EXTRACTED_X_JOKER if "*" joker was detected,
354 : EXTRACTED_NONE else.
355 : */
356 : static Extraction simpleExtract( const OUString& rBcp47,
357 : OUString& rLanguage,
358 : OUString& rScript,
359 : OUString& rCountry,
360 : OUString& rVariants );
361 :
362 : /** Convert Locale to BCP 47 string without resolving system and creating
363 : temporary LanguageTag instances. */
364 : static OUString convertToBcp47( const com::sun::star::lang::Locale& rLocale );
365 : };
366 :
367 :
368 330302 : LanguageTagImpl::LanguageTagImpl( const LanguageTag & rLanguageTag )
369 : :
370 : maLocale( rLanguageTag.maLocale),
371 : maBcp47( rLanguageTag.maBcp47),
372 : mpImplLangtag( NULL),
373 : mnLangID( rLanguageTag.mnLangID),
374 : meIsValid( DECISION_DONTKNOW),
375 : meIsIsoLocale( DECISION_DONTKNOW),
376 : meIsIsoODF( DECISION_DONTKNOW),
377 : meIsLiblangtagNeeded( DECISION_DONTKNOW),
378 : mbSystemLocale( rLanguageTag.mbSystemLocale),
379 : mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
380 : mbInitializedLocale( rLanguageTag.mbInitializedLocale),
381 : mbInitializedLangID( rLanguageTag.mbInitializedLangID),
382 : mbCachedLanguage( false),
383 : mbCachedScript( false),
384 : mbCachedCountry( false),
385 330302 : mbCachedVariants( false)
386 : {
387 330302 : }
388 :
389 :
390 0 : LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl )
391 : :
392 : maLocale( rLanguageTagImpl.maLocale),
393 : maBcp47( rLanguageTagImpl.maBcp47),
394 : maCachedLanguage( rLanguageTagImpl.maCachedLanguage),
395 : maCachedScript( rLanguageTagImpl.maCachedScript),
396 : maCachedCountry( rLanguageTagImpl.maCachedCountry),
397 : maCachedVariants( rLanguageTagImpl.maCachedVariants),
398 : mpImplLangtag( rLanguageTagImpl.mpImplLangtag ?
399 0 : lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : NULL),
400 : mnLangID( rLanguageTagImpl.mnLangID),
401 : meIsValid( rLanguageTagImpl.meIsValid),
402 : meIsIsoLocale( rLanguageTagImpl.meIsIsoLocale),
403 : meIsIsoODF( rLanguageTagImpl.meIsIsoODF),
404 : meIsLiblangtagNeeded( rLanguageTagImpl.meIsLiblangtagNeeded),
405 : mbSystemLocale( rLanguageTagImpl.mbSystemLocale),
406 : mbInitializedBcp47( rLanguageTagImpl.mbInitializedBcp47),
407 : mbInitializedLocale( rLanguageTagImpl.mbInitializedLocale),
408 : mbInitializedLangID( rLanguageTagImpl.mbInitializedLangID),
409 : mbCachedLanguage( rLanguageTagImpl.mbCachedLanguage),
410 : mbCachedScript( rLanguageTagImpl.mbCachedScript),
411 : mbCachedCountry( rLanguageTagImpl.mbCachedCountry),
412 0 : mbCachedVariants( rLanguageTagImpl.mbCachedVariants)
413 : {
414 0 : if (mpImplLangtag)
415 0 : theDataRef::get().init();
416 0 : }
417 :
418 :
419 0 : LanguageTagImpl& LanguageTagImpl::operator=( const LanguageTagImpl & rLanguageTagImpl )
420 : {
421 0 : if (&rLanguageTagImpl == this)
422 0 : return *this;
423 :
424 0 : maLocale = rLanguageTagImpl.maLocale;
425 0 : maBcp47 = rLanguageTagImpl.maBcp47;
426 0 : maCachedLanguage = rLanguageTagImpl.maCachedLanguage;
427 0 : maCachedScript = rLanguageTagImpl.maCachedScript;
428 0 : maCachedCountry = rLanguageTagImpl.maCachedCountry;
429 0 : maCachedVariants = rLanguageTagImpl.maCachedVariants;
430 0 : lt_tag_t * oldTag = mpImplLangtag;
431 : mpImplLangtag = rLanguageTagImpl.mpImplLangtag ?
432 0 : lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : NULL;
433 0 : lt_tag_unref(oldTag);
434 0 : mnLangID = rLanguageTagImpl.mnLangID;
435 0 : meIsValid = rLanguageTagImpl.meIsValid;
436 0 : meIsIsoLocale = rLanguageTagImpl.meIsIsoLocale;
437 0 : meIsIsoODF = rLanguageTagImpl.meIsIsoODF;
438 0 : meIsLiblangtagNeeded= rLanguageTagImpl.meIsLiblangtagNeeded;
439 0 : mbSystemLocale = rLanguageTagImpl.mbSystemLocale;
440 0 : mbInitializedBcp47 = rLanguageTagImpl.mbInitializedBcp47;
441 0 : mbInitializedLocale = rLanguageTagImpl.mbInitializedLocale;
442 0 : mbInitializedLangID = rLanguageTagImpl.mbInitializedLangID;
443 0 : mbCachedLanguage = rLanguageTagImpl.mbCachedLanguage;
444 0 : mbCachedScript = rLanguageTagImpl.mbCachedScript;
445 0 : mbCachedCountry = rLanguageTagImpl.mbCachedCountry;
446 0 : mbCachedVariants = rLanguageTagImpl.mbCachedVariants;
447 0 : if (mpImplLangtag && !oldTag)
448 0 : theDataRef::get().init();
449 0 : return *this;
450 : }
451 :
452 :
453 660350 : LanguageTagImpl::~LanguageTagImpl()
454 : {
455 330175 : if (mpImplLangtag)
456 : {
457 66 : lt_tag_unref( mpImplLangtag);
458 : }
459 330175 : }
460 :
461 :
462 349736 : LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
463 : :
464 : maBcp47( rBcp47LanguageTag),
465 : mnLangID( LANGUAGE_DONTKNOW),
466 349736 : mbSystemLocale( rBcp47LanguageTag.isEmpty()),
467 349736 : mbInitializedBcp47( !mbSystemLocale),
468 : mbInitializedLocale( false),
469 : mbInitializedLangID( false),
470 1049208 : mbIsFallback( false)
471 : {
472 349736 : if (bCanonicalize)
473 : {
474 8575 : getImpl()->canonicalize();
475 : // Registration itself may already have canonicalized, so do an
476 : // unconditional sync.
477 8575 : syncFromImpl();
478 : }
479 :
480 349736 : }
481 :
482 :
483 1390695923 : LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
484 : :
485 : maLocale( rLocale),
486 : mnLangID( LANGUAGE_DONTKNOW),
487 1390695923 : mbSystemLocale( rLocale.Language.isEmpty()),
488 : mbInitializedBcp47( false),
489 1390695923 : mbInitializedLocale( !mbSystemLocale),
490 : mbInitializedLangID( false),
491 4172087769 : mbIsFallback( false)
492 : {
493 1390695923 : handleVendorVariant( maLocale);
494 1390695923 : }
495 :
496 :
497 1399783858 : LanguageTag::LanguageTag( LanguageType nLanguage )
498 : :
499 : mnLangID( nLanguage),
500 1399783858 : mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
501 : mbInitializedBcp47( false),
502 : mbInitializedLocale( false),
503 1399783858 : mbInitializedLangID( !mbSystemLocale),
504 4199351574 : mbIsFallback( false)
505 : {
506 1399783858 : }
507 :
508 :
509 890 : LanguageTag::LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
510 : const OUString& rScript, const OUString& rCountry )
511 : :
512 : maBcp47( rBcp47),
513 : mnLangID( LANGUAGE_DONTKNOW),
514 890 : mbSystemLocale( rBcp47.isEmpty() && rLanguage.isEmpty()),
515 890 : mbInitializedBcp47( !rBcp47.isEmpty()),
516 : mbInitializedLocale( false),
517 : mbInitializedLangID( false),
518 2670 : mbIsFallback( false)
519 : {
520 890 : if (!mbSystemLocale && !mbInitializedBcp47)
521 : {
522 890 : if (rScript.isEmpty())
523 : {
524 890 : maBcp47 = rLanguage + "-" + rCountry;
525 890 : mbInitializedBcp47 = true;
526 890 : maLocale.Language = rLanguage;
527 890 : maLocale.Country = rCountry;
528 890 : mbInitializedLocale = true;
529 : }
530 : else
531 : {
532 0 : if (rCountry.isEmpty())
533 0 : maBcp47 = rLanguage + "-" + rScript;
534 : else
535 0 : maBcp47 = rLanguage + "-" + rScript + "-" + rCountry;
536 0 : mbInitializedBcp47 = true;
537 0 : maLocale.Language = I18NLANGTAG_QLT;
538 0 : maLocale.Country = rCountry;
539 0 : maLocale.Variant = maBcp47;
540 0 : mbInitializedLocale = true;
541 : }
542 : }
543 890 : }
544 :
545 :
546 211 : LanguageTag::LanguageTag( const rtl_Locale & rLocale )
547 : :
548 : maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
549 : mnLangID( LANGUAGE_DONTKNOW),
550 211 : mbSystemLocale( maLocale.Language.isEmpty()),
551 : mbInitializedBcp47( false),
552 211 : mbInitializedLocale( !mbSystemLocale),
553 : mbInitializedLangID( false),
554 633 : mbIsFallback( false)
555 : {
556 211 : convertFromRtlLocale();
557 211 : }
558 :
559 :
560 150560201 : LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
561 : :
562 : maLocale( rLanguageTag.maLocale),
563 : maBcp47( rLanguageTag.maBcp47),
564 : mnLangID( rLanguageTag.mnLangID),
565 : mpImpl( rLanguageTag.mpImpl),
566 : mbSystemLocale( rLanguageTag.mbSystemLocale),
567 : mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
568 : mbInitializedLocale( rLanguageTag.mbInitializedLocale),
569 : mbInitializedLangID( rLanguageTag.mbInitializedLangID),
570 150560201 : mbIsFallback(rLanguageTag.mbIsFallback)
571 : {
572 150560201 : }
573 :
574 :
575 262162083 : LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
576 : {
577 262162083 : if (&rLanguageTag == this)
578 0 : return *this;
579 :
580 262162083 : maLocale = rLanguageTag.maLocale;
581 262162083 : maBcp47 = rLanguageTag.maBcp47;
582 262162083 : mnLangID = rLanguageTag.mnLangID;
583 262162083 : mpImpl = rLanguageTag.mpImpl;
584 262162083 : mbSystemLocale = rLanguageTag.mbSystemLocale;
585 262162083 : mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47;
586 262162083 : mbInitializedLocale = rLanguageTag.mbInitializedLocale;
587 262162083 : mbInitializedLangID = rLanguageTag.mbInitializedLangID;
588 262162083 : return *this;
589 : }
590 :
591 :
592 2941340711 : LanguageTag::~LanguageTag()
593 : {
594 2941340711 : }
595 :
596 :
597 5028 : LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID )
598 : {
599 5028 : LanguageTag::ImplPtr pImpl;
600 :
601 5028 : if (!mbInitializedBcp47)
602 : {
603 0 : if (mbInitializedLocale)
604 : {
605 0 : maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
606 0 : mbInitializedBcp47 = !maBcp47.isEmpty();
607 : }
608 : }
609 5028 : if (maBcp47.isEmpty())
610 : {
611 : SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
612 0 : return pImpl;
613 : }
614 :
615 10056 : osl::MutexGuard aGuard( theMutex::get());
616 :
617 5028 : MapBcp47& rMapBcp47 = theMapBcp47::get();
618 5028 : MapBcp47::const_iterator it( rMapBcp47.find( maBcp47));
619 5028 : bool bOtherImpl = false;
620 5028 : if (it != rMapBcp47.end())
621 : {
622 : SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47 << "'");
623 5028 : pImpl = (*it).second;
624 5028 : if (pImpl.get() != this)
625 : {
626 : // Could happen for example if during registerImpl() the tag was
627 : // changed via canonicalize() and the result was already present in
628 : // the map before, for example 'bn-Beng' => 'bn'. This specific
629 : // case is now taken care of in registerImpl() and doesn't reach
630 : // here. However, use the already existing impl if it matches.
631 : SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47 << "'");
632 0 : *this = *pImpl; // ensure consistency
633 0 : bOtherImpl = true;
634 : }
635 : }
636 : else
637 : {
638 : SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47 << "'");
639 0 : pImpl.reset( new LanguageTagImpl( *this));
640 0 : rMapBcp47.insert( ::std::make_pair( maBcp47, pImpl));
641 : }
642 :
643 5028 : if (!bOtherImpl || !pImpl->mbInitializedLangID)
644 : {
645 5028 : if (nRegisterID == 0 || nRegisterID == LANGUAGE_DONTKNOW)
646 17 : nRegisterID = getNextOnTheFlyLanguage();
647 : else
648 : {
649 : // Accept a suggested ID only if it is not mapped yet to something
650 : // different, otherwise we would end up with ambiguous assignments
651 : // of different language tags, for example for the same primary
652 : // LangID with "no", "nb" and "nn".
653 5011 : const MapLangID& rMapLangID = theMapLangID::get();
654 5011 : MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
655 5011 : if (itID != rMapLangID.end())
656 : {
657 2 : if ((*itID).second->maBcp47 != maBcp47)
658 : {
659 : SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
660 : << ::std::hex << nRegisterID << " for '" << maBcp47 << "' have '"
661 : << (*itID).second->maBcp47 << "'");
662 2 : nRegisterID = getNextOnTheFlyLanguage();
663 : }
664 : else
665 : {
666 : SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
667 : << ::std::hex << nRegisterID << " for '" << maBcp47 << "' already registered");
668 : }
669 : }
670 : }
671 5028 : if (!nRegisterID)
672 : {
673 : // out of IDs, nothing to register
674 0 : return pImpl;
675 : }
676 5028 : pImpl->mnLangID = nRegisterID;
677 5028 : pImpl->mbInitializedLangID = true;
678 5028 : if (pImpl.get() != this)
679 : {
680 0 : mnLangID = nRegisterID;
681 0 : mbInitializedLangID = true;
682 : }
683 : }
684 :
685 : ::std::pair< MapLangID::const_iterator, bool > res(
686 5028 : theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
687 5028 : if (res.second)
688 : {
689 : SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
690 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
691 : }
692 : else
693 : {
694 : SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
695 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
696 : << (*res.first).second->maBcp47 << "'");
697 : }
698 :
699 5028 : return pImpl;
700 : }
701 :
702 : // static
703 242 : void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang )
704 : {
705 242 : if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_SYSTEM)
706 : {
707 : SAL_WARN( "i18nlangtag",
708 : "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
709 : ::std::hex << nLang);
710 0 : return;
711 : }
712 : SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex << nLang);
713 242 : MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang);
714 : // Resest system locale to none and let registerImpl() do the rest to
715 : // initialize a new one.
716 242 : theSystemLocale::get().reset();
717 242 : LanguageTag aLanguageTag( LANGUAGE_SYSTEM);
718 242 : aLanguageTag.registerImpl();
719 : }
720 :
721 203618 : static bool lcl_isKnownOnTheFlyID( LanguageType nLang )
722 : {
723 413887 : return nLang != LANGUAGE_DONTKNOW && nLang != LANGUAGE_SYSTEM &&
724 610741 : (LanguageTag::isOnTheFlyID( nLang) || (nLang == MsLangId::getPrimaryLanguage( nLang)));
725 : }
726 :
727 :
728 2701127648 : LanguageTag::ImplPtr LanguageTag::registerImpl() const
729 : {
730 : // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
731 : // here as they access getImpl() and syncFromImpl() and would lead to
732 : // recursion. Also do not use the static LanguageTag::convertTo...()
733 : // methods as they may create temporary LanguageTag instances. Only
734 : // LanguageTagImpl::convertToBcp47(Locale) is ok.
735 :
736 2701127648 : ImplPtr pImpl;
737 :
738 : #if OSL_DEBUG_LEVEL > 0
739 : static size_t nCalls = 0;
740 : ++nCalls;
741 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls << " calls");
742 : #endif
743 :
744 : // Do not register unresolved system locale, also force LangID if system
745 : // and take the system locale shortcut if possible.
746 2701127648 : if (mbSystemLocale)
747 : {
748 299288 : pImpl = theSystemLocale::get();
749 299288 : if (pImpl)
750 : {
751 : #if OSL_DEBUG_LEVEL > 0
752 : static size_t nCallsSystem = 0;
753 : ++nCallsSystem;
754 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem << " system calls");
755 : #endif
756 299033 : return pImpl;
757 : }
758 255 : if (!mbInitializedLangID)
759 : {
760 255 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
761 255 : mbInitializedLangID = (mnLangID != LANGUAGE_SYSTEM);
762 : SAL_WARN_IF( !mbInitializedLangID, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
763 : }
764 : }
765 :
766 2700828615 : if (mbInitializedLangID)
767 : {
768 1400908906 : if (mnLangID == LANGUAGE_DONTKNOW)
769 : {
770 : // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
771 : // conversion attempts. At the same time provide a central breakpoint
772 : // to inspect such places.
773 1039173 : LanguageTag::ImplPtr& rDontKnow = theDontKnow::get();
774 1039173 : if (!rDontKnow)
775 200 : rDontKnow.reset( new LanguageTagImpl( *this));
776 1039173 : pImpl = rDontKnow;
777 : #if OSL_DEBUG_LEVEL > 0
778 : static size_t nCallsDontKnow = 0;
779 : ++nCallsDontKnow;
780 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow << " DontKnow calls");
781 : #endif
782 1039173 : return pImpl;
783 : }
784 : else
785 : {
786 : // A great share are calls for a system equal locale.
787 1399869733 : pImpl = theSystemLocale::get();
788 1399869733 : if (pImpl && pImpl->mnLangID == mnLangID)
789 : {
790 : #if OSL_DEBUG_LEVEL > 0
791 : static size_t nCallsSystemEqual = 0;
792 : ++nCallsSystemEqual;
793 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
794 : << " system equal LangID calls");
795 : #endif
796 621811624 : return pImpl;
797 : }
798 : }
799 : }
800 :
801 : // Force Bcp47 if not LangID.
802 2077977818 : if (!mbInitializedLangID && !mbInitializedBcp47 && mbInitializedLocale)
803 : {
804 1299581443 : maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
805 1299581443 : mbInitializedBcp47 = !maBcp47.isEmpty();
806 : }
807 :
808 2077977818 : if (mbInitializedBcp47)
809 : {
810 : // A great share are calls for a system equal locale.
811 1299923114 : pImpl = theSystemLocale::get();
812 1299923114 : if (pImpl && pImpl->maBcp47 == maBcp47)
813 : {
814 : #if OSL_DEBUG_LEVEL > 0
815 : static size_t nCallsSystemEqual = 0;
816 : ++nCallsSystemEqual;
817 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual << " system equal BCP47 calls");
818 : #endif
819 521813622 : return pImpl;
820 : }
821 : }
822 :
823 : #if OSL_DEBUG_LEVEL > 0
824 : static size_t nCallsNonSystem = 0;
825 : ++nCallsNonSystem;
826 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem << " non-system calls");
827 : #endif
828 :
829 3112328392 : osl::MutexGuard aGuard( theMutex::get());
830 :
831 : #if OSL_DEBUG_LEVEL > 0
832 : static long nRunning = 0;
833 : // Entering twice here is ok, which is needed for fallback init in
834 : // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
835 : // everything else is suspicious.
836 : SAL_WARN_IF( nRunning > 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
837 : << maBcp47 << "' 0x" << ::std::hex << mnLangID );
838 : struct Runner { Runner() { ++nRunning; } ~Runner() { --nRunning; } } aRunner;
839 : #endif
840 :
841 : // Prefer LangID map as find+insert needs less comparison work.
842 1556164196 : if (mbInitializedLangID)
843 : {
844 778058109 : MapLangID& rMap = theMapLangID::get();
845 778058109 : MapLangID::const_iterator it( rMap.find( mnLangID));
846 778058109 : if (it != rMap.end())
847 : {
848 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex << mnLangID);
849 777947936 : pImpl = (*it).second;
850 : }
851 : else
852 : {
853 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex << mnLangID);
854 110173 : pImpl.reset( new LanguageTagImpl( *this));
855 110173 : rMap.insert( ::std::make_pair( mnLangID, pImpl));
856 : // Try round-trip.
857 110173 : if (!pImpl->mbInitializedLocale)
858 109946 : pImpl->convertLangToLocale();
859 110173 : LanguageType nLang = MsLangId::Conversion::convertLocaleToLanguage( pImpl->maLocale);
860 : // If round-trip is identical cross-insert to Bcp47 map.
861 110173 : if (nLang == pImpl->mnLangID)
862 : {
863 103806 : if (!pImpl->mbInitializedBcp47)
864 103579 : pImpl->convertLocaleToBcp47();
865 : ::std::pair< MapBcp47::const_iterator, bool > res(
866 103806 : theMapBcp47::get().insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
867 103806 : if (res.second)
868 : {
869 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID);
870 : }
871 : else
872 : {
873 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " have 0x"
874 : << ::std::hex << (*res.first).second->mnLangID);
875 : }
876 : }
877 : else
878 : {
879 6367 : if (!pImpl->mbInitializedBcp47)
880 6367 : pImpl->convertLocaleToBcp47();
881 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " round-trip to 0x" << ::std::hex << nLang);
882 : }
883 : }
884 : }
885 778106087 : else if (!maBcp47.isEmpty())
886 : {
887 778106087 : MapBcp47& rMap = theMapBcp47::get();
888 778106087 : MapBcp47::const_iterator it( rMap.find( maBcp47));
889 778106087 : if (it != rMap.end())
890 : {
891 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47 << "'");
892 777886158 : pImpl = (*it).second;
893 : }
894 : else
895 : {
896 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47 << "'");
897 219929 : pImpl.reset( new LanguageTagImpl( *this));
898 219929 : ::std::pair< MapBcp47::iterator, bool > insOrig( rMap.insert( ::std::make_pair( maBcp47, pImpl)));
899 : // If changed after canonicalize() also add the resulting tag to
900 : // the map.
901 219929 : if (pImpl->synCanonicalize())
902 : {
903 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl->maBcp47 << "'");
904 : ::std::pair< MapBcp47::const_iterator, bool > insCanon(
905 21726 : rMap.insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
906 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon.second ? "" : "not ")
907 : << "inserted '" << pImpl->maBcp47 << "'");
908 : // If the canonicalized tag already existed (was not inserted)
909 : // and impls are different, make this impl that impl and skip
910 : // the rest if that LangID is present as well. The existing
911 : // entry may or may not be different, it may even be strictly
912 : // identical to this if it differs only in case (e.g. ko-kr =>
913 : // ko-KR) which was corrected in canonicalize() hence also in
914 : // the map entry but comparison is case insensitive and found
915 : // it again.
916 21726 : if (!insCanon.second && (*insCanon.first).second != pImpl)
917 : {
918 16304 : (*insOrig.first).second = pImpl = (*insCanon.first).second;
919 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
920 : << ::std::hex << pImpl->mnLangID);
921 : }
922 : }
923 219929 : if (!pImpl->mbInitializedLangID)
924 : {
925 : // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
926 203618 : if (!pImpl->mbInitializedLocale)
927 203615 : pImpl->convertBcp47ToLocale();
928 203618 : if (!pImpl->mbInitializedLangID)
929 203618 : pImpl->convertLocaleToLang( true);
930 : // Unconditionally insert (round-trip is possible) for
931 : // on-the-fly IDs and (generated or not) suggested IDs.
932 203618 : bool bInsert = lcl_isKnownOnTheFlyID( pImpl->mnLangID);
933 203618 : OUString aBcp47;
934 203618 : if (!bInsert)
935 : {
936 196920 : if (pImpl->mnLangID != LANGUAGE_DONTKNOW)
937 : {
938 : // May have involved canonicalize(), so compare with
939 : // pImpl->maBcp47 instead of maBcp47!
940 393746 : aBcp47 = LanguageTagImpl::convertToBcp47(
941 393746 : MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true));
942 196873 : bInsert = (aBcp47 == pImpl->maBcp47);
943 : }
944 : }
945 : // If round-trip is identical cross-insert to Bcp47 map.
946 203618 : if (bInsert)
947 : {
948 : ::std::pair< MapLangID::const_iterator, bool > res(
949 191480 : theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
950 191480 : if (res.second)
951 : {
952 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
953 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
954 : }
955 : else
956 : {
957 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
958 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
959 : << (*res.first).second->maBcp47 << "'");
960 : }
961 : }
962 : else
963 : {
964 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
965 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' round-trip to '"
966 : << aBcp47 << "'");
967 203618 : }
968 : }
969 : }
970 : }
971 : else
972 : {
973 : SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex << mnLangID );
974 0 : pImpl.reset( new LanguageTagImpl( *this));
975 : }
976 :
977 : // If we reach here for mbSystemLocale we didn't have theSystemLocale
978 : // above, so add it.
979 1556164196 : if (mbSystemLocale && mbInitializedLangID)
980 : {
981 255 : theSystemLocale::get() = pImpl;
982 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
983 : << ::std::hex << pImpl->mnLangID << " '" << pImpl->maBcp47 << "'");
984 : }
985 :
986 1556164196 : return pImpl;
987 : }
988 :
989 :
990 2703609803 : LanguageTag::ImplPtr LanguageTag::getImpl() const
991 : {
992 2703609803 : if (!mpImpl)
993 : {
994 2701124001 : mpImpl = registerImpl();
995 2701124001 : syncVarsFromRawImpl();
996 : }
997 2703609803 : return mpImpl;
998 : }
999 :
1000 :
1001 4021897 : void LanguageTag::resetVars()
1002 : {
1003 4021897 : mpImpl.reset();
1004 4021897 : maLocale = lang::Locale();
1005 4021897 : maBcp47.clear();
1006 4021897 : mnLangID = LANGUAGE_SYSTEM;
1007 4021897 : mbSystemLocale = true;
1008 4021897 : mbInitializedBcp47 = false;
1009 4021897 : mbInitializedLocale = false;
1010 4021897 : mbInitializedLangID = false;
1011 4021897 : mbIsFallback = false;
1012 4021897 : }
1013 :
1014 :
1015 568 : LanguageTag & LanguageTag::reset( const OUString & rBcp47LanguageTag, bool bCanonicalize )
1016 : {
1017 568 : resetVars();
1018 568 : maBcp47 = rBcp47LanguageTag;
1019 568 : mbSystemLocale = rBcp47LanguageTag.isEmpty();
1020 568 : mbInitializedBcp47 = !mbSystemLocale;
1021 :
1022 568 : if (bCanonicalize)
1023 : {
1024 0 : getImpl()->canonicalize();
1025 : // Registration itself may already have canonicalized, so do an
1026 : // unconditional sync.
1027 0 : syncFromImpl();
1028 : }
1029 568 : return *this;
1030 : }
1031 :
1032 :
1033 40 : LanguageTag & LanguageTag::reset( const com::sun::star::lang::Locale & rLocale )
1034 : {
1035 40 : resetVars();
1036 40 : maLocale = rLocale;
1037 40 : mbSystemLocale = rLocale.Language.isEmpty();
1038 40 : mbInitializedLocale = !mbSystemLocale;
1039 40 : handleVendorVariant( maLocale);
1040 40 : return *this;
1041 : }
1042 :
1043 :
1044 4021289 : LanguageTag & LanguageTag::reset( LanguageType nLanguage )
1045 : {
1046 4021289 : resetVars();
1047 4021289 : mnLangID = nLanguage;
1048 4021289 : mbSystemLocale = nLanguage == LANGUAGE_SYSTEM;
1049 4021289 : mbInitializedLangID = !mbSystemLocale;
1050 4021289 : return *this;
1051 : }
1052 :
1053 :
1054 338819 : bool LanguageTagImpl::canonicalize()
1055 : {
1056 : #ifdef erDEBUG
1057 : // dump once
1058 : struct dumper
1059 : {
1060 : lt_tag_t** mpp;
1061 : dumper( lt_tag_t** pp ) : mpp( *pp ? NULL : pp) {}
1062 : ~dumper() { if (mpp && *mpp) lt_tag_dump( *mpp); }
1063 : };
1064 : dumper aDumper( &mpImplLangtag);
1065 : #endif
1066 :
1067 338819 : bool bChanged = false;
1068 :
1069 : // Side effect: have maBcp47 in any case, resolved system.
1070 : // Some methods calling canonicalize() (or not calling it due to
1071 : // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
1072 : // meIsLiblangtagNeeded anywhere else than hereafter.
1073 338819 : getBcp47();
1074 :
1075 : // The simple cases and known locales don't need liblangtag processing,
1076 : // which also avoids loading liblangtag data on startup.
1077 338819 : if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
1078 : {
1079 330244 : bool bTemporaryLocale = false;
1080 330244 : bool bTemporaryLangID = false;
1081 330244 : if (!mbInitializedLocale && !mbInitializedLangID)
1082 : {
1083 219919 : if (mbSystemLocale)
1084 : {
1085 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1086 0 : mbInitializedLangID = true;
1087 : }
1088 : else
1089 : {
1090 : // Now this is getting funny.. we only have some BCP47 string
1091 : // and want to determine if parsing it would be possible
1092 : // without using liblangtag just to see if it is a simple known
1093 : // locale or could fall back to one.
1094 439838 : OUString aLanguage, aScript, aCountry, aVariants;
1095 219919 : Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
1096 219919 : if (eExt != EXTRACTED_NONE)
1097 : {
1098 219869 : if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV)
1099 : {
1100 : // Rebuild bcp47 with proper casing of tags.
1101 219854 : OUStringBuffer aBuf( aLanguage.getLength() + 1 + aScript.getLength() +
1102 219854 : 1 + aCountry.getLength() + 1 + aVariants.getLength());
1103 219854 : aBuf.append( aLanguage);
1104 219854 : if (!aScript.isEmpty())
1105 28368 : aBuf.append("-" + aScript);
1106 219854 : if (!aCountry.isEmpty())
1107 191472 : aBuf.append("-" + aCountry);
1108 219854 : if (!aVariants.isEmpty())
1109 837 : aBuf.append("-" + aVariants);
1110 439708 : OUString aStr( aBuf.makeStringAndClear());
1111 :
1112 219854 : if (maBcp47 != aStr)
1113 : {
1114 0 : maBcp47 = aStr;
1115 0 : bChanged = true;
1116 219854 : }
1117 : }
1118 219869 : if (eExt == EXTRACTED_LSC && aScript.isEmpty())
1119 : {
1120 190649 : maLocale.Language = aLanguage;
1121 190649 : maLocale.Country = aCountry;
1122 : }
1123 : else
1124 : {
1125 29220 : maLocale.Language = I18NLANGTAG_QLT;
1126 29220 : maLocale.Country = aCountry;
1127 29220 : maLocale.Variant = maBcp47;
1128 : }
1129 219869 : bTemporaryLocale = mbInitializedLocale = true;
1130 219919 : }
1131 : }
1132 : }
1133 330244 : if (mbInitializedLangID && !mbInitializedLocale)
1134 : {
1135 : // Do not call getLocale() here because that prefers
1136 : // convertBcp47ToLocale() which would end up in recursion via
1137 : // isIsoLocale()!
1138 :
1139 : // Prepare to verify that we have a known locale, not just an
1140 : // arbitrary MS-LangID.
1141 0 : convertLangToLocale();
1142 : }
1143 330244 : if (mbInitializedLocale)
1144 : {
1145 330194 : if (!mbInitializedLangID)
1146 : {
1147 219879 : convertLocaleToLang( false);
1148 219879 : if (bTemporaryLocale || mnLangID == LANGUAGE_DONTKNOW)
1149 219872 : bTemporaryLangID = true;
1150 : }
1151 330194 : if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
1152 324967 : meIsLiblangtagNeeded = DECISION_NO; // known locale
1153 : else
1154 : {
1155 5227 : const KnownTagSet& rKnowns = getKnowns();
1156 5227 : if (rKnowns.find( maBcp47) != rKnowns.end())
1157 5211 : meIsLiblangtagNeeded = DECISION_NO; // known fallback
1158 : }
1159 : // We may have an internal override "canonicalization".
1160 330194 : lang::Locale aNew( MsLangId::Conversion::getOverride( maLocale));
1161 680502 : if (!aNew.Language.isEmpty() &&
1162 625647 : (aNew.Language != maLocale.Language ||
1163 600518 : aNew.Country != maLocale.Country ||
1164 300050 : aNew.Variant != maLocale.Variant))
1165 : {
1166 25129 : maBcp47 = LanguageTagImpl::convertToBcp47( aNew);
1167 25129 : bChanged = true;
1168 25129 : meIsIsoLocale = DECISION_DONTKNOW;
1169 25129 : meIsIsoODF = DECISION_DONTKNOW;
1170 25129 : meIsLiblangtagNeeded = DECISION_NO; // known locale
1171 330194 : }
1172 : }
1173 330244 : if (bTemporaryLocale)
1174 : {
1175 219869 : mbInitializedLocale = false;
1176 219869 : maLocale = lang::Locale();
1177 : }
1178 330244 : if (bTemporaryLangID)
1179 : {
1180 219872 : mbInitializedLangID = false;
1181 219872 : mnLangID = LANGUAGE_DONTKNOW;
1182 : }
1183 : }
1184 338819 : if (meIsLiblangtagNeeded == DECISION_NO)
1185 : {
1186 338708 : meIsValid = DECISION_YES; // really, known must be valid ...
1187 338708 : return bChanged; // that's it
1188 : }
1189 :
1190 111 : meIsLiblangtagNeeded = DECISION_YES;
1191 : SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
1192 :
1193 111 : if (!mpImplLangtag)
1194 : {
1195 66 : theDataRef::get().init();
1196 66 : mpImplLangtag = lt_tag_new();
1197 : }
1198 :
1199 111 : myLtError aError;
1200 :
1201 111 : if (lt_tag_parse( mpImplLangtag, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
1202 : {
1203 19 : char* pTag = lt_tag_canonicalize( mpImplLangtag, &aError.p);
1204 : SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
1205 19 : if (pTag)
1206 : {
1207 19 : OUString aNew( OUString::createFromAscii( pTag));
1208 : // Make the lt_tag_t follow the new string if different, which
1209 : // removes default script and such.
1210 19 : if (maBcp47 != aNew)
1211 : {
1212 2 : maBcp47 = aNew;
1213 2 : bChanged = true;
1214 2 : meIsIsoLocale = DECISION_DONTKNOW;
1215 2 : meIsIsoODF = DECISION_DONTKNOW;
1216 2 : if (!lt_tag_parse( mpImplLangtag, pTag, &aError.p))
1217 : {
1218 : SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '" << maBcp47 << "'");
1219 0 : free( pTag);
1220 0 : meIsValid = DECISION_NO;
1221 0 : return bChanged;
1222 : }
1223 : }
1224 19 : free( pTag);
1225 19 : meIsValid = DECISION_YES;
1226 19 : return bChanged;
1227 : }
1228 : }
1229 : else
1230 : {
1231 : SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
1232 : }
1233 92 : meIsValid = DECISION_NO;
1234 92 : return bChanged;
1235 : }
1236 :
1237 :
1238 1485332 : bool LanguageTagImpl::synCanonicalize()
1239 : {
1240 1485332 : bool bChanged = false;
1241 1485332 : if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
1242 : {
1243 330244 : bChanged = canonicalize();
1244 330244 : if (bChanged)
1245 : {
1246 25131 : if (mbInitializedLocale)
1247 3405 : convertBcp47ToLocale();
1248 25131 : if (mbInitializedLangID)
1249 3405 : convertBcp47ToLang();
1250 : }
1251 : }
1252 1485332 : return bChanged;
1253 : }
1254 :
1255 :
1256 751833 : void LanguageTag::syncFromImpl()
1257 : {
1258 751833 : ImplPtr xImpl = getImpl();
1259 751833 : LanguageTagImpl* pImpl = xImpl.get();
1260 755238 : bool bRegister = ((mbInitializedBcp47 && maBcp47 != pImpl->maBcp47) ||
1261 1500261 : (mbInitializedLangID && mnLangID != pImpl->mnLangID));
1262 : SAL_INFO_IF( bRegister, "i18nlangtag",
1263 : "LanguageTag::syncFromImpl: re-registering, '" << pImpl->maBcp47 << "' vs '" << maBcp47 <<
1264 : " and 0x" << ::std::hex << pImpl->mnLangID << " vs 0x" << ::std::hex << mnLangID);
1265 751833 : syncVarsFromRawImpl();
1266 751833 : if (bRegister)
1267 3405 : mpImpl = registerImpl();
1268 751833 : }
1269 :
1270 :
1271 2699210178 : void LanguageTag::syncVarsFromImpl() const
1272 : {
1273 2699210178 : if (!mpImpl)
1274 2699210178 : getImpl(); // with side effect syncVarsFromRawImpl()
1275 : else
1276 0 : syncVarsFromRawImpl();
1277 2699210178 : }
1278 :
1279 :
1280 2701875834 : void LanguageTag::syncVarsFromRawImpl() const
1281 : {
1282 : // Do not use getImpl() here.
1283 2701875834 : LanguageTagImpl* pImpl = mpImpl.get();
1284 2701875834 : if (!pImpl)
1285 2701875834 : return;
1286 :
1287 : // Obviously only mutable variables.
1288 2701875834 : mbInitializedBcp47 = pImpl->mbInitializedBcp47;
1289 2701875834 : maBcp47 = pImpl->maBcp47;
1290 2701875834 : mbInitializedLocale = pImpl->mbInitializedLocale;
1291 2701875834 : maLocale = pImpl->maLocale;
1292 2701875834 : mbInitializedLangID = pImpl->mbInitializedLangID;
1293 2701875834 : mnLangID = pImpl->mnLangID;
1294 : }
1295 :
1296 :
1297 0 : bool LanguageTag::synCanonicalize()
1298 : {
1299 0 : bool bChanged = getImpl()->synCanonicalize();
1300 0 : if (bChanged)
1301 0 : syncFromImpl();
1302 0 : return bChanged;
1303 : }
1304 :
1305 :
1306 110146 : void LanguageTagImpl::convertLocaleToBcp47()
1307 : {
1308 110146 : if (mbSystemLocale && !mbInitializedLocale)
1309 0 : convertLangToLocale();
1310 :
1311 110146 : if (maLocale.Language.isEmpty())
1312 : {
1313 : // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
1314 : // locale via LanguageTag::convertToBcp47(LanguageType) and
1315 : // LanguageTag::convertToLocale(LanguageType) would instanciate another
1316 : // LanguageTag.
1317 3 : maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, true);
1318 : }
1319 110146 : if (maLocale.Language.isEmpty())
1320 : {
1321 0 : maBcp47.clear(); // bad luck
1322 : }
1323 110146 : else if (maLocale.Language == I18NLANGTAG_QLT)
1324 : {
1325 16168 : maBcp47 = maLocale.Variant;
1326 16168 : meIsIsoLocale = DECISION_NO;
1327 : }
1328 : else
1329 : {
1330 93978 : maBcp47 = LanguageTag::convertToBcp47( maLocale, true);
1331 : }
1332 110146 : mbInitializedBcp47 = true;
1333 110146 : }
1334 :
1335 :
1336 426902 : void LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID )
1337 : {
1338 426902 : if (mbSystemLocale)
1339 : {
1340 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1341 : }
1342 : else
1343 : {
1344 426902 : mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
1345 426902 : if (mnLangID == LANGUAGE_DONTKNOW && bAllowOnTheFlyID)
1346 : {
1347 5075 : if (isValidBcp47())
1348 : {
1349 : // For language-only (including script) look if we know some
1350 : // locale of that language and if so try to use the primary
1351 : // language ID of that instead of generating an on-the-fly ID.
1352 5028 : if (getCountry().isEmpty() && isIsoODF())
1353 : {
1354 5013 : lang::Locale aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale));
1355 : // 'en-US' is last resort, do not use except when looking
1356 : // for 'en'.
1357 5013 : if (aLoc.Language != "en" || getLanguage() == "en")
1358 : {
1359 5011 : mnLangID = MsLangId::Conversion::convertLocaleToLanguage( aLoc);
1360 5011 : if (mnLangID != LANGUAGE_DONTKNOW)
1361 5011 : mnLangID = MsLangId::getPrimaryLanguage( mnLangID);
1362 5013 : }
1363 : }
1364 5028 : registerOnTheFly( mnLangID);
1365 : }
1366 : else
1367 : {
1368 : SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
1369 : << maBcp47 << "'");
1370 : }
1371 : }
1372 : }
1373 426902 : mbInitializedLangID = true;
1374 426902 : }
1375 :
1376 :
1377 0 : void LanguageTag::convertLocaleToLang()
1378 : {
1379 0 : getImpl()->convertLocaleToLang( true);
1380 0 : syncFromImpl();
1381 0 : }
1382 :
1383 :
1384 207020 : void LanguageTagImpl::convertBcp47ToLocale()
1385 : {
1386 207020 : bool bIso = isIsoLocale();
1387 207020 : if (bIso)
1388 : {
1389 182948 : maLocale.Language = getLanguageFromLangtag();
1390 182948 : maLocale.Country = getRegionFromLangtag();
1391 182948 : maLocale.Variant.clear();
1392 : }
1393 : else
1394 : {
1395 24072 : maLocale.Language = I18NLANGTAG_QLT;
1396 24072 : maLocale.Country = getCountry();
1397 24072 : maLocale.Variant = maBcp47;
1398 : }
1399 207020 : mbInitializedLocale = true;
1400 207020 : }
1401 :
1402 :
1403 0 : void LanguageTag::convertBcp47ToLocale()
1404 : {
1405 0 : getImpl()->convertBcp47ToLocale();
1406 0 : syncFromImpl();
1407 0 : }
1408 :
1409 :
1410 3405 : void LanguageTagImpl::convertBcp47ToLang()
1411 : {
1412 3405 : if (mbSystemLocale)
1413 : {
1414 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1415 : }
1416 : else
1417 : {
1418 3405 : if (!mbInitializedLocale)
1419 0 : convertBcp47ToLocale();
1420 3405 : convertLocaleToLang( true);
1421 : }
1422 3405 : mbInitializedLangID = true;
1423 3405 : }
1424 :
1425 :
1426 0 : void LanguageTag::convertBcp47ToLang()
1427 : {
1428 0 : getImpl()->convertBcp47ToLang();
1429 0 : syncFromImpl();
1430 0 : }
1431 :
1432 :
1433 110146 : void LanguageTagImpl::convertLangToLocale()
1434 : {
1435 110146 : if (mbSystemLocale && !mbInitializedLangID)
1436 : {
1437 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1438 0 : mbInitializedLangID = true;
1439 : }
1440 : // Resolve system here! The original is remembered as mbSystemLocale.
1441 110146 : maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true);
1442 110146 : mbInitializedLocale = true;
1443 110146 : }
1444 :
1445 :
1446 0 : void LanguageTag::convertLangToLocale()
1447 : {
1448 0 : getImpl()->convertLangToLocale();
1449 0 : syncFromImpl();
1450 0 : }
1451 :
1452 :
1453 200 : void LanguageTagImpl::convertLangToBcp47()
1454 : {
1455 200 : if (!mbInitializedLocale)
1456 200 : convertLangToLocale();
1457 200 : convertLocaleToBcp47();
1458 200 : mbInitializedBcp47 = true;
1459 200 : }
1460 :
1461 :
1462 211 : void LanguageTag::convertFromRtlLocale()
1463 : {
1464 : // The rtl_Locale follows the Open Group Base Specification,
1465 : // 8.2 Internationalization Variables
1466 : // language[_territory][.codeset][@modifier]
1467 : // On GNU/Linux systems usually being glibc locales.
1468 : // sal/osl/unx/nlsupport.c _parse_locale() parses them into
1469 : // Language: language 2 or 3 alpha code
1470 : // Country: [territory] 2 alpha code
1471 : // Variant: [.codeset][@modifier]
1472 : // Variant effectively contains anything that follows the territory, not
1473 : // looking for '.' dot delimiter or '@' modifier content.
1474 211 : if (!maLocale.Variant.isEmpty())
1475 : {
1476 422 : OString aStr = OUStringToOString( maLocale.Language + "_" + maLocale.Country + maLocale.Variant,
1477 211 : RTL_TEXTENCODING_UTF8);
1478 : /* FIXME: let liblangtag parse this entirely with
1479 : * lt_tag_convert_from_locale() but that needs a patch to pass the
1480 : * string. */
1481 : #if 0
1482 : myLtError aError;
1483 : theDataRef::get().init();
1484 : mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
1485 : maBcp47 = OStringToOUString( lt_tag_get_string( mpImplLangtag), RTL_TEXTENCODING_UTF8);
1486 : mbInitializedBcp47 = true;
1487 : #else
1488 211 : mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr);
1489 211 : if (mnLangID == LANGUAGE_DONTKNOW)
1490 : {
1491 : SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
1492 0 : mnLangID = LANGUAGE_ENGLISH_US; // we need _something_ here
1493 : }
1494 211 : mbInitializedLangID = true;
1495 : #endif
1496 211 : maLocale = lang::Locale();
1497 211 : mbInitializedLocale = false;
1498 : }
1499 211 : }
1500 :
1501 :
1502 339005 : const OUString & LanguageTagImpl::getBcp47() const
1503 : {
1504 339005 : if (!mbInitializedBcp47)
1505 : {
1506 200 : if (mbInitializedLocale)
1507 0 : const_cast<LanguageTagImpl*>(this)->convertLocaleToBcp47();
1508 : else
1509 200 : const_cast<LanguageTagImpl*>(this)->convertLangToBcp47();
1510 : }
1511 339005 : return maBcp47;
1512 : }
1513 :
1514 :
1515 171049269 : const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
1516 : {
1517 171049269 : if (!bResolveSystem && mbSystemLocale)
1518 7827 : return theEmptyBcp47::get();
1519 171041442 : if (!mbInitializedBcp47)
1520 170962487 : syncVarsFromImpl();
1521 171041442 : if (!mbInitializedBcp47)
1522 : {
1523 186 : getImpl()->getBcp47();
1524 186 : const_cast<LanguageTag*>(this)->syncFromImpl();
1525 : }
1526 171041442 : return maBcp47;
1527 : }
1528 :
1529 :
1530 476007 : OUString LanguageTagImpl::getLanguageFromLangtag()
1531 : {
1532 476007 : OUString aLanguage;
1533 476007 : synCanonicalize();
1534 476007 : if (maBcp47.isEmpty())
1535 0 : return aLanguage;
1536 476007 : if (mpImplLangtag)
1537 : {
1538 21 : const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag);
1539 : SAL_WARN_IF( !pLangT, "i18nlangtag",
1540 : "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'");
1541 21 : if (!pLangT)
1542 12 : return aLanguage;
1543 9 : const char* pLang = lt_lang_get_tag( pLangT);
1544 : SAL_WARN_IF( !pLang, "i18nlangtag",
1545 : "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'");
1546 9 : if (pLang)
1547 9 : aLanguage = OUString::createFromAscii( pLang);
1548 : }
1549 : else
1550 : {
1551 475986 : if (mbCachedLanguage || cacheSimpleLSCV())
1552 475984 : aLanguage = maCachedLanguage;
1553 : }
1554 475995 : return aLanguage;
1555 : }
1556 :
1557 :
1558 18 : OUString LanguageTagImpl::getScriptFromLangtag()
1559 : {
1560 18 : OUString aScript;
1561 18 : synCanonicalize();
1562 18 : if (maBcp47.isEmpty())
1563 0 : return aScript;
1564 18 : if (mpImplLangtag)
1565 : {
1566 18 : const lt_script_t* pScriptT = lt_tag_get_script( mpImplLangtag);
1567 : // pScriptT==NULL is valid for default scripts
1568 18 : if (!pScriptT)
1569 17 : return aScript;
1570 1 : const char* pScript = lt_script_get_tag( pScriptT);
1571 : SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
1572 1 : if (pScript)
1573 1 : aScript = OUString::createFromAscii( pScript);
1574 : }
1575 : else
1576 : {
1577 0 : if (mbCachedScript || cacheSimpleLSCV())
1578 0 : aScript = maCachedScript;
1579 : }
1580 1 : return aScript;
1581 : }
1582 :
1583 :
1584 483757 : OUString LanguageTagImpl::getRegionFromLangtag()
1585 : {
1586 483757 : OUString aRegion;
1587 483757 : synCanonicalize();
1588 483757 : if (maBcp47.isEmpty())
1589 0 : return aRegion;
1590 483757 : if (mpImplLangtag)
1591 : {
1592 74 : const lt_region_t* pRegionT = lt_tag_get_region( mpImplLangtag);
1593 : // pRegionT==NULL is valid for language only tags, rough check here
1594 : // that does not take sophisticated tags into account that actually
1595 : // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
1596 : // that ll-CC and lll-CC actually fail.
1597 : SAL_WARN_IF( !pRegionT &&
1598 : maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
1599 : maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
1600 : "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'");
1601 74 : if (!pRegionT)
1602 71 : return aRegion;
1603 3 : const char* pRegion = lt_region_get_tag( pRegionT);
1604 : SAL_WARN_IF( !pRegion, "i18nlangtag",
1605 : "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'");
1606 3 : if (pRegion)
1607 3 : aRegion = OUString::createFromAscii( pRegion);
1608 : }
1609 : else
1610 : {
1611 483683 : if (mbCachedCountry || cacheSimpleLSCV())
1612 483680 : aRegion = maCachedCountry;
1613 : }
1614 483686 : return aRegion;
1615 : }
1616 :
1617 :
1618 2 : OUString LanguageTagImpl::getVariantsFromLangtag()
1619 : {
1620 2 : OUString aVariants;
1621 2 : synCanonicalize();
1622 2 : if (maBcp47.isEmpty())
1623 0 : return aVariants;
1624 2 : if (mpImplLangtag)
1625 : {
1626 2 : const lt_list_t* pVariantsT = lt_tag_get_variants( mpImplLangtag);
1627 3 : for (const lt_list_t* pE = pVariantsT; pE; pE = lt_list_next( pE))
1628 : {
1629 1 : const lt_variant_t* pVariantT = static_cast<const lt_variant_t*>(lt_list_value( pE));
1630 1 : if (pVariantT)
1631 : {
1632 1 : const char* p = lt_variant_get_tag( pVariantT);
1633 1 : if (p)
1634 : {
1635 1 : if (aVariants.isEmpty())
1636 1 : aVariants = OUString::createFromAscii( p);
1637 : else
1638 0 : aVariants += "-" + OUString::createFromAscii( p);
1639 : }
1640 : }
1641 : }
1642 : }
1643 : else
1644 : {
1645 0 : if (mbCachedVariants || cacheSimpleLSCV())
1646 0 : aVariants = maCachedVariants;
1647 : }
1648 2 : return aVariants;
1649 : }
1650 :
1651 :
1652 2392677436 : const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
1653 : {
1654 2392677436 : if (!bResolveSystem && mbSystemLocale)
1655 575 : return theEmptyLocale::get();
1656 2392676861 : if (!mbInitializedLocale)
1657 1314038604 : syncVarsFromImpl();
1658 2392676861 : if (!mbInitializedLocale)
1659 : {
1660 0 : if (mbInitializedBcp47)
1661 0 : const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
1662 : else
1663 0 : const_cast<LanguageTag*>(this)->convertLangToLocale();
1664 : }
1665 2392676861 : return maLocale;
1666 : }
1667 :
1668 :
1669 1457853028 : LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
1670 : {
1671 1457853028 : if (!bResolveSystem && mbSystemLocale)
1672 2154989 : return LANGUAGE_SYSTEM;
1673 1455698039 : if (!mbInitializedLangID)
1674 1214209087 : syncVarsFromImpl();
1675 1455698039 : if (!mbInitializedLangID)
1676 : {
1677 0 : if (mbInitializedBcp47)
1678 0 : const_cast<LanguageTag*>(this)->convertBcp47ToLang();
1679 : else
1680 : {
1681 0 : const_cast<LanguageTag*>(this)->convertLocaleToLang();
1682 :
1683 : /* Resolve a locale only unknown due to some redundant information,
1684 : * like 'de-Latn-DE' with script tag. Never call canonicalize()
1685 : * from within convert...() methods due to possible recursion, so
1686 : * do it here. */
1687 0 : if ((!mbSystemLocale && mnLangID == LANGUAGE_SYSTEM) || mnLangID == LANGUAGE_DONTKNOW)
1688 0 : const_cast<LanguageTag*>(this)->synCanonicalize();
1689 : }
1690 : }
1691 1455698039 : return mnLangID;
1692 : }
1693 :
1694 :
1695 0 : void LanguageTag::getIsoLanguageScriptCountry( OUString& rLanguage, OUString& rScript, OUString& rCountry ) const
1696 : {
1697 : // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
1698 : // and getCountry() to work correctly in this context.
1699 0 : if (isIsoODF())
1700 : {
1701 0 : rLanguage = getLanguage();
1702 0 : rScript = getScript();
1703 0 : rCountry = getCountry();
1704 : }
1705 : else
1706 : {
1707 0 : rLanguage = (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
1708 0 : rScript = (LanguageTag::isIsoScript( getScript()) ? getScript() : OUString());
1709 0 : rCountry = (LanguageTag::isIsoCountry( getCountry()) ? getCountry() : OUString());
1710 : }
1711 0 : }
1712 :
1713 :
1714 : namespace
1715 : {
1716 :
1717 640977 : inline bool isLowerAscii( sal_Unicode c )
1718 : {
1719 640977 : return 'a' <= c && c <= 'z';
1720 : }
1721 :
1722 536174 : inline bool isUpperAscii( sal_Unicode c )
1723 : {
1724 536174 : return 'A' <= c && c <= 'Z';
1725 : }
1726 :
1727 : }
1728 :
1729 :
1730 : // static
1731 276530 : bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
1732 : {
1733 : /* TODO: ignore case? For now let's see where rubbish is used. */
1734 : bool b2chars;
1735 917539 : if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) &&
1736 1106045 : isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
1737 87949 : (b2chars || isLowerAscii( rLanguage[2])))
1738 276505 : return true;
1739 : SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
1740 : (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
1741 : (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
1742 : "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
1743 25 : return false;
1744 : }
1745 :
1746 :
1747 : // static
1748 300809 : bool LanguageTag::isIsoCountry( const OUString& rRegion )
1749 : {
1750 : /* TODO: ignore case? For now let's see where rubbish is used. */
1751 869702 : if (rRegion.isEmpty() ||
1752 536171 : (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
1753 300806 : return true;
1754 : SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
1755 : "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
1756 3 : return false;
1757 : }
1758 :
1759 :
1760 : // static
1761 5056 : bool LanguageTag::isIsoScript( const OUString& rScript )
1762 : {
1763 : /* TODO: ignore case? For now let's see where rubbish is used. */
1764 10118 : if (rScript.isEmpty() ||
1765 12 : (rScript.getLength() == 4 &&
1766 18 : isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
1767 12 : isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
1768 5056 : return true;
1769 : SAL_WARN_IF( rScript.getLength() == 4 &&
1770 : (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
1771 : isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
1772 : "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
1773 0 : return false;
1774 : }
1775 :
1776 :
1777 386620 : OUString LanguageTagImpl::getLanguage() const
1778 : {
1779 386620 : if (!mbCachedLanguage)
1780 : {
1781 293059 : maCachedLanguage = const_cast<LanguageTagImpl*>(this)->getLanguageFromLangtag();
1782 293059 : mbCachedLanguage = true;
1783 : }
1784 386620 : return maCachedLanguage;
1785 : }
1786 :
1787 :
1788 2232952 : OUString LanguageTag::getLanguage() const
1789 : {
1790 2232952 : ImplPtr pImpl = getImpl();
1791 2232952 : if (pImpl->mbCachedLanguage)
1792 2122864 : return pImpl->maCachedLanguage;
1793 220176 : OUString aRet( pImpl->getLanguage());
1794 110088 : const_cast<LanguageTag*>(this)->syncFromImpl();
1795 2343040 : return aRet;
1796 : }
1797 :
1798 :
1799 5056 : OUString LanguageTagImpl::getScript() const
1800 : {
1801 5056 : if (!mbCachedScript)
1802 : {
1803 18 : maCachedScript = const_cast<LanguageTagImpl*>(this)->getScriptFromLangtag();
1804 18 : mbCachedScript = true;
1805 : }
1806 5056 : return maCachedScript;
1807 : }
1808 :
1809 :
1810 63485 : OUString LanguageTag::getScript() const
1811 : {
1812 63485 : ImplPtr pImpl = getImpl();
1813 63485 : if (pImpl->mbCachedScript)
1814 63485 : return pImpl->maCachedScript;
1815 0 : OUString aRet( pImpl->getScript());
1816 0 : const_cast<LanguageTag*>(this)->syncFromImpl();
1817 63485 : return aRet;
1818 : }
1819 :
1820 :
1821 7525 : OUString LanguageTag::getLanguageAndScript() const
1822 : {
1823 7525 : OUString aLanguageScript( getLanguage());
1824 15050 : OUString aScript( getScript());
1825 7525 : if (!aScript.isEmpty())
1826 : {
1827 17 : aLanguageScript += "-" + aScript;
1828 : }
1829 15050 : return aLanguageScript;
1830 : }
1831 :
1832 :
1833 29327 : OUString LanguageTagImpl::getCountry() const
1834 : {
1835 29327 : if (!mbCachedCountry)
1836 : {
1837 24304 : maCachedCountry = const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1838 24304 : if (!LanguageTag::isIsoCountry( maCachedCountry))
1839 1 : maCachedCountry.clear();
1840 24304 : mbCachedCountry = true;
1841 : }
1842 29327 : return maCachedCountry;
1843 : }
1844 :
1845 :
1846 650659 : OUString LanguageTag::getCountry() const
1847 : {
1848 650659 : ImplPtr pImpl = getImpl();
1849 650659 : if (pImpl->mbCachedCountry)
1850 650432 : return pImpl->maCachedCountry;
1851 454 : OUString aRet( pImpl->getCountry());
1852 227 : const_cast<LanguageTag*>(this)->syncFromImpl();
1853 650886 : return aRet;
1854 : }
1855 :
1856 :
1857 276505 : OUString LanguageTagImpl::getRegion() const
1858 : {
1859 276505 : return const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1860 : }
1861 :
1862 :
1863 6 : OUString LanguageTagImpl::getVariants() const
1864 : {
1865 6 : if (!mbCachedVariants)
1866 : {
1867 2 : maCachedVariants = const_cast<LanguageTagImpl*>(this)->getVariantsFromLangtag();
1868 2 : mbCachedVariants = true;
1869 : }
1870 6 : return maCachedVariants;
1871 : }
1872 :
1873 :
1874 59178 : OUString LanguageTag::getVariants() const
1875 : {
1876 59178 : ImplPtr pImpl = getImpl();
1877 59178 : if (pImpl->mbCachedVariants)
1878 59178 : return pImpl->maCachedVariants;
1879 0 : OUString aRet( pImpl->getVariants());
1880 0 : const_cast<LanguageTag*>(this)->syncFromImpl();
1881 59178 : return aRet;
1882 : }
1883 :
1884 :
1885 3 : OUString LanguageTag::getGlibcLocaleString( const OUString & rEncoding ) const
1886 : {
1887 3 : OUString aRet;
1888 3 : if (isIsoLocale())
1889 : {
1890 3 : OUString aCountry( getCountry());
1891 3 : if (aCountry.isEmpty())
1892 0 : aRet = getLanguage() + rEncoding;
1893 : else
1894 3 : aRet = getLanguage() + "_" + aCountry + rEncoding;
1895 : }
1896 : else
1897 : {
1898 : /* FIXME: use the aImplIsoLangGLIBCModifiersEntries table from
1899 : * i18nlangtag/source/isolang/isolang.cxx or let liblangtag handle it.
1900 : * So far no code was prepared for anything else than a simple
1901 : * language_country locale so we don't lose anything here right now.
1902 : * */
1903 : }
1904 3 : return aRet;
1905 : }
1906 :
1907 :
1908 65129 : bool LanguageTagImpl::hasScript() const
1909 : {
1910 65129 : if (!mbCachedScript)
1911 0 : getScript();
1912 65129 : return !maCachedScript.isEmpty();
1913 : }
1914 :
1915 :
1916 65129 : bool LanguageTag::hasScript() const
1917 : {
1918 65129 : bool bRet = getImpl()->hasScript();
1919 65129 : const_cast<LanguageTag*>(this)->syncFromImpl();
1920 65129 : return bRet;
1921 : }
1922 :
1923 :
1924 313876 : bool LanguageTagImpl::cacheSimpleLSCV()
1925 : {
1926 627752 : OUString aLanguage, aScript, aCountry, aVariants;
1927 313876 : Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
1928 313876 : bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV);
1929 313876 : if (bRet)
1930 : {
1931 313871 : maCachedLanguage = aLanguage;
1932 313871 : maCachedScript = aScript;
1933 313871 : maCachedCountry = aCountry;
1934 313871 : maCachedVariants = aVariants;
1935 313871 : mbCachedLanguage = mbCachedScript = mbCachedCountry = mbCachedVariants = true;
1936 : }
1937 627752 : return bRet;
1938 : }
1939 :
1940 :
1941 778944 : bool LanguageTagImpl::isIsoLocale() const
1942 : {
1943 778944 : if (meIsIsoLocale == DECISION_DONTKNOW)
1944 : {
1945 300569 : const_cast<LanguageTagImpl*>(this)->synCanonicalize();
1946 : // It must be at most ll-CC or lll-CC
1947 : // Do not use getCountry() here, use getRegion() instead.
1948 601138 : meIsIsoLocale = ((maBcp47.isEmpty() ||
1949 1707156 : (maBcp47.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
1950 1454689 : LanguageTag::isIsoCountry( getRegion()))) ? DECISION_YES : DECISION_NO);
1951 : }
1952 778944 : return meIsIsoLocale == DECISION_YES;
1953 : }
1954 :
1955 :
1956 566874 : bool LanguageTag::isIsoLocale() const
1957 : {
1958 566874 : bool bRet = getImpl()->isIsoLocale();
1959 566874 : const_cast<LanguageTag*>(this)->syncFromImpl();
1960 566874 : return bRet;
1961 : }
1962 :
1963 :
1964 5381 : bool LanguageTagImpl::isIsoODF() const
1965 : {
1966 5381 : if (meIsIsoODF == DECISION_DONTKNOW)
1967 : {
1968 5050 : const_cast<LanguageTagImpl*>(this)->synCanonicalize();
1969 5050 : if (!LanguageTag::isIsoScript( getScript()))
1970 : {
1971 0 : meIsIsoODF = DECISION_NO;
1972 0 : return false;
1973 : }
1974 : // The usual case is lll-CC so simply check that first.
1975 5050 : if (isIsoLocale())
1976 : {
1977 5029 : meIsIsoODF = DECISION_YES;
1978 5029 : return true;
1979 : }
1980 : // If this is not ISO locale for which script must not exist it can
1981 : // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
1982 : // ll-vvvvvvvv
1983 94 : meIsIsoODF = ((maBcp47.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
1984 53 : LanguageTag::isIsoCountry( getRegion()) && LanguageTag::isIsoScript( getScript()) &&
1985 59 : getVariants().isEmpty()) ? DECISION_YES : DECISION_NO);
1986 : }
1987 352 : return meIsIsoODF == DECISION_YES;
1988 : }
1989 :
1990 :
1991 353 : bool LanguageTag::isIsoODF() const
1992 : {
1993 353 : bool bRet = getImpl()->isIsoODF();
1994 353 : const_cast<LanguageTag*>(this)->syncFromImpl();
1995 353 : return bRet;
1996 : }
1997 :
1998 :
1999 5476 : bool LanguageTagImpl::isValidBcp47() const
2000 : {
2001 5476 : if (meIsValid == DECISION_DONTKNOW)
2002 : {
2003 0 : const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2004 : SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
2005 : "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
2006 : }
2007 5476 : return meIsValid == DECISION_YES;
2008 : }
2009 :
2010 :
2011 401 : bool LanguageTag::isValidBcp47() const
2012 : {
2013 401 : bool bRet = getImpl()->isValidBcp47();
2014 401 : const_cast<LanguageTag*>(this)->syncFromImpl();
2015 401 : return bRet;
2016 : }
2017 :
2018 :
2019 :
2020 :
2021 1926 : LanguageTag & LanguageTag::makeFallback()
2022 : {
2023 1926 : if (!mbIsFallback)
2024 : {
2025 1926 : const lang::Locale& rLocale1 = getLocale( true);
2026 1926 : lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
2027 5778 : if ( rLocale1.Language != aLocale2.Language ||
2028 3813 : rLocale1.Country != aLocale2.Country ||
2029 1887 : rLocale1.Variant != aLocale2.Variant)
2030 : {
2031 39 : if (rLocale1.Language != "en" && aLocale2.Language == "en" && aLocale2.Country == "US")
2032 : {
2033 : // "en-US" is the last resort fallback, try if we get a better
2034 : // one for the fallback hierarchy of a non-"en" locale.
2035 0 : ::std::vector< OUString > aFallbacks( getFallbackStrings( false));
2036 0 : for (::std::vector< OUString >::const_iterator it( aFallbacks.begin()); it != aFallbacks.end(); ++it)
2037 : {
2038 0 : lang::Locale aLocale3( LanguageTag( *it).getLocale());
2039 0 : aLocale2 = MsLangId::Conversion::lookupFallbackLocale( aLocale3);
2040 0 : if (aLocale2.Language != "en" || aLocale2.Country != "US")
2041 0 : break; // for, success
2042 0 : }
2043 : }
2044 : SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
2045 : rLocale1.Language << "," << rLocale1.Country << "," << rLocale1.Variant << ") to (" <<
2046 : aLocale2.Language << "," << aLocale2.Country << "," << aLocale2.Variant << ")");
2047 39 : reset( aLocale2);
2048 : }
2049 1926 : mbIsFallback = true;
2050 : }
2051 1926 : return *this;
2052 : }
2053 :
2054 :
2055 : /* TODO: maybe this now could take advantage of the mnOverride field in
2056 : * isolang.cxx entries and search for kSAME instead of harcoded special
2057 : * fallbacks. Though iterating through those tables would be slower and even
2058 : * then there would be some special cases, but we wouldn't lack entries that
2059 : * were missed out. */
2060 438357 : ::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const
2061 : {
2062 438357 : ::std::vector< OUString > aVec;
2063 876714 : OUString aLanguage( getLanguage());
2064 876714 : OUString aCountry( getCountry());
2065 438357 : if (isIsoLocale())
2066 : {
2067 379181 : if (!aCountry.isEmpty())
2068 : {
2069 325180 : if (bIncludeFullBcp47)
2070 305818 : aVec.push_back( aLanguage + "-" + aCountry);
2071 325180 : if (aLanguage == "zh")
2072 : {
2073 : // For zh-HK or zh-MO also list zh-TW, for all other zh-XX also
2074 : // list zh-CN.
2075 3864 : if (aCountry == "HK" || aCountry == "MO")
2076 1288 : aVec.push_back( aLanguage + "-TW");
2077 2576 : else if (aCountry != "CN")
2078 1288 : aVec.push_back( aLanguage + "-CN");
2079 3864 : aVec.push_back( aLanguage);
2080 : }
2081 321316 : else if (aLanguage == "sh")
2082 : {
2083 : // Manual list instead of calling
2084 : // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
2085 : // that would also include "sh-*" again.
2086 0 : aVec.push_back( "sr-Latn-" + aCountry);
2087 0 : aVec.push_back( "sr-Latn");
2088 0 : aVec.push_back( "sh"); // legacy with script, before default script with country
2089 0 : aVec.push_back( "sr-" + aCountry);
2090 0 : aVec.push_back( "sr");
2091 : }
2092 321316 : else if (aLanguage == "ca" && aCountry == "XV")
2093 : {
2094 0 : ::std::vector< OUString > aRep( LanguageTag( "ca-ES-valencia").getFallbackStrings( true));
2095 0 : aVec.insert( aVec.end(), aRep.begin(), aRep.end());
2096 : // Already includes 'ca' language fallback.
2097 : }
2098 321316 : else if (aLanguage == "ku")
2099 : {
2100 0 : if (aCountry == "TR" || aCountry == "SY")
2101 : {
2102 0 : aVec.push_back( "kmr-Latn-" + aCountry);
2103 0 : aVec.push_back( "kmr-" + aCountry);
2104 0 : aVec.push_back( "kmr-Latn");
2105 0 : aVec.push_back( "kmr");
2106 0 : aVec.push_back( aLanguage);
2107 : }
2108 0 : else if (aCountry == "IQ" || aCountry == "IR")
2109 : {
2110 0 : aVec.push_back( "ckb-" + aCountry);
2111 0 : aVec.push_back( "ckb");
2112 : }
2113 : }
2114 321316 : else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY"))
2115 : {
2116 0 : aVec.push_back( "ku-Latn-" + aCountry);
2117 0 : aVec.push_back( "ku-" + aCountry);
2118 0 : aVec.push_back( aLanguage);
2119 0 : aVec.push_back( "ku");
2120 : }
2121 321316 : else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR"))
2122 : {
2123 3864 : aVec.push_back( "ku-Arab-" + aCountry);
2124 3864 : aVec.push_back( "ku-" + aCountry);
2125 3864 : aVec.push_back( aLanguage);
2126 : // not 'ku' only, that was used for Latin script
2127 : }
2128 : else
2129 317452 : aVec.push_back( aLanguage);
2130 : }
2131 : else
2132 : {
2133 54001 : if (bIncludeFullBcp47)
2134 53999 : aVec.push_back( aLanguage);
2135 54001 : if (aLanguage == "sh")
2136 : {
2137 0 : aVec.push_back( "sr-Latn");
2138 0 : aVec.push_back( "sr");
2139 : }
2140 54001 : else if (aLanguage == "pli")
2141 : {
2142 : // a special case for Pali dictionary, see fdo#41599
2143 0 : aVec.push_back( "pi-Latn");
2144 0 : aVec.push_back( "pi");
2145 : }
2146 : }
2147 379181 : return aVec;
2148 : }
2149 :
2150 59176 : getBcp47(); // have maBcp47 now
2151 59176 : if (bIncludeFullBcp47)
2152 59176 : aVec.push_back( maBcp47);
2153 118352 : OUString aScript;
2154 118352 : OUString aVariants( getVariants());
2155 118352 : OUString aTmp;
2156 59176 : if (hasScript())
2157 : {
2158 55951 : aScript = getScript();
2159 55951 : bool bHaveLanguageScriptVariant = false;
2160 55951 : if (!aCountry.isEmpty())
2161 : {
2162 37655 : if (!aVariants.isEmpty())
2163 : {
2164 0 : aTmp = aLanguage + "-" + aScript + "-" + aCountry + "-" + aVariants;
2165 0 : if (aTmp != maBcp47)
2166 0 : aVec.push_back( aTmp);
2167 : // Language with variant but without country before language
2168 : // without variant but with country.
2169 0 : aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2170 0 : if (aTmp != maBcp47)
2171 0 : aVec.push_back( aTmp);
2172 0 : bHaveLanguageScriptVariant = true;
2173 : }
2174 37655 : aTmp = aLanguage + "-" + aScript + "-" + aCountry;
2175 37655 : if (aTmp != maBcp47)
2176 0 : aVec.push_back( aTmp);
2177 37655 : if (aLanguage == "sr" && aScript == "Latn")
2178 : {
2179 : // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
2180 9018 : if (aCountry == "CS")
2181 : {
2182 2160 : aVec.push_back( "sr-Latn-YU");
2183 2160 : aVec.push_back( "sh-CS");
2184 2160 : aVec.push_back( "sh-YU");
2185 : }
2186 : else
2187 6858 : aVec.push_back( "sh-" + aCountry);
2188 : }
2189 28637 : else if (aLanguage == "pi" && aScript == "Latn")
2190 0 : aVec.push_back( "pli"); // a special case for Pali dictionary, see fdo#41599
2191 28637 : else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY"))
2192 0 : aVec.push_back( "ku-" + aCountry);
2193 : }
2194 55951 : if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant)
2195 : {
2196 0 : aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2197 0 : if (aTmp != maBcp47)
2198 0 : aVec.push_back( aTmp);
2199 : }
2200 55951 : aTmp = aLanguage + "-" + aScript;
2201 55951 : if (aTmp != maBcp47)
2202 37655 : aVec.push_back( aTmp);
2203 :
2204 : // 'sh' actually denoted a script, so have it here instead of appended
2205 : // at the end as language-only.
2206 55951 : if (aLanguage == "sr" && aScript == "Latn")
2207 10950 : aVec.push_back( "sh");
2208 45001 : else if (aLanguage == "ku" && aScript == "Arab")
2209 0 : aVec.push_back( "ckb");
2210 : // 'ku' only denoted Latin script
2211 45001 : else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty())
2212 0 : aVec.push_back( "ku");
2213 : }
2214 59176 : bool bHaveLanguageVariant = false;
2215 59176 : if (!aCountry.isEmpty())
2216 : {
2217 40878 : if (!aVariants.isEmpty())
2218 : {
2219 3223 : aTmp = aLanguage + "-" + aCountry + "-" + aVariants;
2220 3223 : if (aTmp != maBcp47)
2221 0 : aVec.push_back( aTmp);
2222 3223 : if (maBcp47 == "ca-ES-valencia")
2223 2578 : aVec.push_back( "ca-XV");
2224 : // Language with variant but without country before language
2225 : // without variant but with country.
2226 : // But only if variant is not from a grandfathered tag that
2227 : // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
2228 : // not.
2229 6446 : if (aVariants.getLength() >= 5 ||
2230 645 : (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2231 : {
2232 2578 : aTmp = aLanguage + "-" + aVariants;
2233 2578 : if (aTmp != maBcp47)
2234 2578 : aVec.push_back( aTmp);
2235 2578 : bHaveLanguageVariant = true;
2236 : }
2237 : }
2238 40878 : aTmp = aLanguage + "-" + aCountry;
2239 40878 : if (aTmp != maBcp47)
2240 40878 : aVec.push_back( aTmp);
2241 : }
2242 59176 : if (!aVariants.isEmpty() && !bHaveLanguageVariant)
2243 : {
2244 : // Only if variant is not from a grandfathered tag that wouldn't match
2245 : // the rules, i.e. "de-1901" is fine but "en-oed" is not.
2246 1295 : if (aVariants.getLength() >= 5 ||
2247 647 : (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2248 : {
2249 2 : aTmp = aLanguage + "-" + aVariants;
2250 2 : if (aTmp != maBcp47)
2251 0 : aVec.push_back( aTmp);
2252 : }
2253 : }
2254 :
2255 : // Insert legacy fallbacks with country before language-only, but only
2256 : // default script, script was handled already above.
2257 59176 : if (!aCountry.isEmpty())
2258 : {
2259 40878 : if (aLanguage == "sr" && aCountry == "CS")
2260 2160 : aVec.push_back( "sr-YU");
2261 : }
2262 :
2263 : // Original language-only.
2264 59176 : if (aLanguage != maBcp47)
2265 59176 : aVec.push_back( aLanguage);
2266 :
2267 59176 : return aVec;
2268 : }
2269 :
2270 :
2271 0 : bool LanguageTag::equals( const LanguageTag & rLanguageTag, bool bResolveSystem ) const
2272 : {
2273 : // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
2274 : // can use the operator==() optimization.
2275 0 : if (!bResolveSystem || isSystemLocale() == rLanguageTag.isSystemLocale())
2276 0 : return operator==( rLanguageTag);
2277 :
2278 : // Compare full language tag strings.
2279 0 : return getBcp47( bResolveSystem) == rLanguageTag.getBcp47( bResolveSystem);
2280 : }
2281 :
2282 :
2283 87289987 : bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
2284 : {
2285 87289987 : if (isSystemLocale() && rLanguageTag.isSystemLocale())
2286 234237 : return true; // both SYSTEM
2287 :
2288 : // No need to convert to BCP47 if both Lang-IDs are available.
2289 87055750 : if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
2290 : {
2291 : // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
2292 1671664 : return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
2293 : }
2294 :
2295 : // Compare full language tag strings but SYSTEM unresolved.
2296 85384086 : return getBcp47( false) == rLanguageTag.getBcp47( false);
2297 : }
2298 :
2299 :
2300 87279743 : bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
2301 : {
2302 87279743 : return !operator==( rLanguageTag);
2303 : }
2304 :
2305 :
2306 87 : bool LanguageTag::operator<( const LanguageTag & rLanguageTag ) const
2307 : {
2308 87 : return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag.getBcp47( false)) < 0;
2309 : }
2310 :
2311 :
2312 : // static
2313 533795 : LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp47,
2314 : OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rVariants )
2315 : {
2316 533795 : Extraction eRet = EXTRACTED_NONE;
2317 533795 : const sal_Int32 nLen = rBcp47.getLength();
2318 533795 : const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
2319 533795 : sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
2320 533795 : sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
2321 533795 : sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
2322 533795 : if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker
2323 : {
2324 : // It's f*d up but we need to recognize this.
2325 6 : eRet = EXTRACTED_X_JOKER;
2326 : }
2327 533789 : else if (nHyph1 == 1 && rBcp47[0] == 'x') // x-... privateuse
2328 : {
2329 : // x-... privateuse tags MUST be known to us by definition.
2330 14 : eRet = EXTRACTED_X;
2331 : }
2332 533775 : else if (nLen == 2 || nLen == 3) // ll or lll
2333 : {
2334 81558 : if (nHyph1 < 0)
2335 : {
2336 40779 : rLanguage = rBcp47.toAsciiLowerCase();
2337 40779 : rScript.clear();
2338 40779 : rCountry.clear();
2339 40779 : rVariants.clear();
2340 40779 : eRet = EXTRACTED_LSC;
2341 : }
2342 : }
2343 492996 : else if ( (nHyph1 == 2 && nLen == 5) // ll-CC
2344 201377 : || (nHyph1 == 3 && nLen == 6)) // lll-CC
2345 : {
2346 853136 : if (nHyph2 < 0)
2347 : {
2348 426568 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2349 426568 : rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2350 426568 : rScript.clear();
2351 426568 : rVariants.clear();
2352 426568 : eRet = EXTRACTED_LSC;
2353 : }
2354 : }
2355 66428 : else if ( (nHyph1 == 2 && nLen == 7) // ll-Ssss or ll-vvvv
2356 45657 : || (nHyph1 == 3 && nLen == 8)) // lll-Ssss or lll-vvvv
2357 : {
2358 25015 : if (nHyph2 < 0)
2359 : {
2360 25015 : sal_Unicode c = rBcp47[nHyph1+1];
2361 25015 : if ('0' <= c && c <= '9')
2362 : {
2363 : // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
2364 1 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2365 1 : rScript.clear();
2366 1 : rCountry.clear();
2367 1 : rVariants = rBcp47.copy( nHyph1 + 1);
2368 1 : eRet = EXTRACTED_LV;
2369 : }
2370 : else
2371 : {
2372 25014 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2373 50028 : rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() +
2374 75042 : rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2375 25014 : rCountry.clear();
2376 25014 : rVariants.clear();
2377 25014 : eRet = EXTRACTED_LSC;
2378 : }
2379 25015 : }
2380 : }
2381 41413 : else if ( (nHyph1 == 2 && nHyph2 == 7 && nLen == 10) // ll-Ssss-CC
2382 14720 : || (nHyph1 == 3 && nHyph2 == 8 && nLen == 11)) // lll-Ssss-CC
2383 : {
2384 78020 : if (nHyph3 < 0)
2385 : {
2386 39010 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2387 39010 : rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2388 39010 : rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2389 39010 : rVariants.clear();
2390 39010 : eRet = EXTRACTED_LSC;
2391 : }
2392 : }
2393 2403 : else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15) // ll-Ssss-CC-vvvv[vvvv][-...]
2394 2403 : || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16)) // lll-Ssss-CC-vvvv[vvvv][-...]
2395 : {
2396 0 : if (nHyph4 < 0)
2397 0 : nHyph4 = rBcp47.getLength();
2398 0 : if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
2399 : {
2400 0 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2401 0 : rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2402 0 : rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2403 0 : rVariants = rBcp47.copy( nHyph3 + 1);
2404 0 : eRet = EXTRACTED_LV;
2405 : }
2406 : }
2407 2403 : else if ( (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10) // ll-CC-vvvv[vvvv][-...]
2408 1113 : || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11)) // lll-CC-vvvv[vvvv][-...]
2409 : {
2410 1290 : if (nHyph3 < 0)
2411 1290 : nHyph3 = rBcp47.getLength();
2412 2580 : if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
2413 : {
2414 1290 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2415 1290 : rScript.clear();
2416 1290 : rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2417 1290 : rVariants = rBcp47.copy( nHyph2 + 1);
2418 1290 : eRet = EXTRACTED_LV;
2419 : }
2420 : }
2421 1113 : else if ( (nHyph1 == 2 && nLen >= 8) // ll-vvvvv[vvv][-...]
2422 47 : || (nHyph1 == 3 && nLen >= 9)) // lll-vvvvv[vvv][-...]
2423 : {
2424 1066 : if (nHyph2 < 0)
2425 5 : nHyph2 = rBcp47.getLength();
2426 1066 : if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)
2427 : {
2428 2 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2429 2 : rScript.clear();
2430 2 : rCountry.clear();
2431 2 : rVariants = rBcp47.copy( nHyph1 + 1);
2432 2 : eRet = EXTRACTED_LV;
2433 : }
2434 : else
2435 : {
2436 : // Known and handled grandfathered; ugly but effective ...
2437 : // Note that nLen must have matched above.
2438 : // Strictly not a variant, but so far we treat it as such.
2439 1064 : if (rBcp47.equalsIgnoreAsciiCase( "en-GB-oed"))
2440 : {
2441 1061 : rLanguage = "en";
2442 1061 : rScript.clear();
2443 1061 : rCountry = "GB";
2444 1061 : rVariants = "oed";
2445 1061 : eRet = EXTRACTED_LV;
2446 : }
2447 : }
2448 : }
2449 533795 : if (eRet == EXTRACTED_NONE)
2450 : {
2451 : SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'");
2452 50 : rLanguage.clear();
2453 50 : rScript.clear();
2454 50 : rCountry.clear();
2455 50 : rVariants.clear();
2456 : }
2457 533795 : return eRet;
2458 : }
2459 :
2460 :
2461 : // static
2462 50428 : ::std::vector< OUString >::const_iterator LanguageTag::getFallback(
2463 : const ::std::vector< OUString > & rList, const OUString & rReference )
2464 : {
2465 50428 : if (rList.empty())
2466 0 : return rList.end();
2467 :
2468 50428 : ::std::vector< OUString >::const_iterator it;
2469 :
2470 : // Try the simple case first without constructing fallbacks.
2471 69772 : for (it = rList.begin(); it != rList.end(); ++it)
2472 : {
2473 50428 : if (*it == rReference)
2474 31084 : return it; // exact match
2475 : }
2476 :
2477 19344 : ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2478 19344 : if (rReference != "en-US")
2479 : {
2480 0 : aFallbacks.push_back( "en-US");
2481 0 : if (rReference != "en")
2482 0 : aFallbacks.push_back( "en");
2483 : }
2484 19344 : if (rReference != "x-default")
2485 19344 : aFallbacks.push_back( "x-default");
2486 19344 : if (rReference != "x-no-translate")
2487 19344 : aFallbacks.push_back( "x-no-translate");
2488 : /* TODO: the original comphelper::Locale::getFallback() code had
2489 : * "x-notranslate" instead of "x-no-translate", but all .xcu files use
2490 : * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
2491 : * Did that ever work? Was it supposed to work at all like this? */
2492 :
2493 75738 : for (::std::vector< OUString >::const_iterator fb = aFallbacks.begin(); fb != aFallbacks.end(); ++fb)
2494 : {
2495 113607 : for (it = rList.begin(); it != rList.end(); ++it)
2496 : {
2497 57213 : if (*it == *fb)
2498 819 : return it; // fallback found
2499 : }
2500 : }
2501 :
2502 : // Did not find anything so return something of the list, the first value
2503 : // will do as well as any other as none did match any of the possible
2504 : // fallbacks.
2505 18525 : return rList.begin();
2506 : }
2507 :
2508 :
2509 : // static
2510 0 : ::std::vector< com::sun::star::lang::Locale >::const_iterator LanguageTag::getMatchingFallback(
2511 : const ::std::vector< com::sun::star::lang::Locale > & rList,
2512 : const com::sun::star::lang::Locale & rReference )
2513 : {
2514 0 : if (rList.empty())
2515 0 : return rList.end();
2516 :
2517 0 : ::std::vector< lang::Locale >::const_iterator it;
2518 :
2519 : // Try the simple case first without constructing fallbacks.
2520 0 : for (it = rList.begin(); it != rList.end(); ++it)
2521 : {
2522 0 : if ( (*it).Language == rReference.Language &&
2523 0 : (*it).Country == rReference.Country &&
2524 0 : (*it).Variant == rReference.Variant)
2525 0 : return it; // exact match
2526 : }
2527 :
2528 : // Now for each reference fallback test the fallbacks of the list in order.
2529 0 : ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2530 0 : ::std::vector< ::std::vector< OUString > > aListFallbacks( rList.size());
2531 0 : size_t i = 0;
2532 0 : for (it = rList.begin(); it != rList.end(); ++it, ++i)
2533 : {
2534 0 : ::std::vector< OUString > aTmp( LanguageTag( *it).getFallbackStrings( true));
2535 0 : aListFallbacks[i] = aTmp;
2536 0 : }
2537 0 : for (::std::vector< OUString >::const_iterator rfb( aFallbacks.begin()); rfb != aFallbacks.end(); ++rfb)
2538 : {
2539 0 : for (::std::vector< ::std::vector< OUString > >::const_iterator lfb( aListFallbacks.begin());
2540 0 : lfb != aListFallbacks.end(); ++lfb)
2541 : {
2542 0 : for (::std::vector< OUString >::const_iterator fb( (*lfb).begin()); fb != (*lfb).end(); ++fb)
2543 : {
2544 0 : if (*rfb == *fb)
2545 0 : return rList.begin() + (lfb - aListFallbacks.begin());
2546 : }
2547 : }
2548 : }
2549 :
2550 : // No match found.
2551 0 : return rList.end();
2552 : }
2553 :
2554 :
2555 234 : static bool lcl_isSystem( LanguageType nLangID )
2556 : {
2557 234 : if (nLangID == LANGUAGE_SYSTEM)
2558 199 : return true;
2559 : // There are some special values that simplify to SYSTEM,
2560 : // getRealLanguage() catches and resolves them.
2561 35 : LanguageType nNewLangID = MsLangId::getRealLanguage( nLangID);
2562 35 : if (nNewLangID != nLangID)
2563 0 : return true;
2564 35 : return false;
2565 : }
2566 :
2567 :
2568 : // static
2569 1228346507 : com::sun::star::lang::Locale LanguageTag::convertToLocale( LanguageType nLangID, bool bResolveSystem )
2570 : {
2571 1228346507 : if (!bResolveSystem && lcl_isSystem( nLangID))
2572 199 : return lang::Locale();
2573 :
2574 1228346308 : return LanguageTag( nLangID).getLocale( bResolveSystem);
2575 : }
2576 :
2577 :
2578 : // static
2579 1214244301 : LanguageType LanguageTag::convertToLanguageType( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem )
2580 : {
2581 1214244301 : if (rLocale.Language.isEmpty() && !bResolveSystem)
2582 64408 : return LANGUAGE_SYSTEM;
2583 :
2584 1214179893 : return LanguageTag( rLocale).getLanguageType( bResolveSystem);
2585 : }
2586 :
2587 :
2588 : // static
2589 1299899351 : OUString LanguageTagImpl::convertToBcp47( const com::sun::star::lang::Locale& rLocale )
2590 : {
2591 1299899351 : OUString aBcp47;
2592 1299899351 : if (rLocale.Language.isEmpty())
2593 : {
2594 : // aBcp47 stays empty
2595 : }
2596 1299899351 : else if (rLocale.Language == I18NLANGTAG_QLT)
2597 : {
2598 44722 : aBcp47 = rLocale.Variant;
2599 : }
2600 : else
2601 : {
2602 : /* XXX NOTE: most legacy code never evaluated the Variant field, so for
2603 : * now just concatenate language and country. In case we stumbled over
2604 : * variant aware code we'd have to take care of that. */
2605 1299854629 : if (rLocale.Country.isEmpty())
2606 388900635 : aBcp47 = rLocale.Language;
2607 : else
2608 : {
2609 910953994 : aBcp47 = rLocale.Language + "-" + rLocale.Country;
2610 : }
2611 : }
2612 1299899351 : return aBcp47;
2613 : }
2614 :
2615 :
2616 : // static
2617 95661 : OUString LanguageTag::convertToBcp47( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem )
2618 : {
2619 95661 : OUString aBcp47;
2620 95661 : if (rLocale.Language.isEmpty())
2621 : {
2622 57 : if (bResolveSystem)
2623 1 : aBcp47 = LanguageTag::convertToBcp47( LANGUAGE_SYSTEM, true);
2624 : // else aBcp47 stays empty
2625 : }
2626 : else
2627 : {
2628 95604 : aBcp47 = LanguageTagImpl::convertToBcp47( rLocale);
2629 : }
2630 95661 : return aBcp47;
2631 : }
2632 :
2633 :
2634 : // static
2635 302 : OUString LanguageTag::convertToBcp47( LanguageType nLangID, bool bResolveSystem )
2636 : {
2637 : // Catch this first so we don't need the rest.
2638 302 : if (!bResolveSystem && lcl_isSystem( nLangID))
2639 0 : return OUString();
2640 :
2641 302 : lang::Locale aLocale( LanguageTag::convertToLocale( nLangID, bResolveSystem));
2642 : // If system for some reason (should not happen.. haha) could not be
2643 : // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
2644 : // would recurse into this method here!
2645 302 : if (aLocale.Language.isEmpty() && bResolveSystem)
2646 0 : return OUString(); // bad luck, bail out
2647 302 : return LanguageTagImpl::convertToBcp47( aLocale);
2648 : }
2649 :
2650 :
2651 : // static
2652 41751 : com::sun::star::lang::Locale LanguageTag::convertToLocale( const OUString& rBcp47, bool bResolveSystem )
2653 : {
2654 41751 : if (rBcp47.isEmpty() && !bResolveSystem)
2655 0 : return lang::Locale();
2656 :
2657 41751 : return LanguageTag( rBcp47).getLocale( bResolveSystem);
2658 : }
2659 :
2660 :
2661 : // static
2662 1598 : LanguageType LanguageTag::convertToLanguageType( const OUString& rBcp47, bool bResolveSystem )
2663 : {
2664 1598 : if (rBcp47.isEmpty() && !bResolveSystem)
2665 0 : return LANGUAGE_SYSTEM;
2666 :
2667 1598 : return LanguageTag( rBcp47).getLanguageType( bResolveSystem);
2668 : }
2669 :
2670 :
2671 : // static
2672 1315 : LanguageType LanguageTag::convertToLanguageTypeWithFallback( const OUString& rBcp47 )
2673 : {
2674 1315 : return LanguageTag( rBcp47).makeFallback().getLanguageType( true);
2675 : }
2676 :
2677 :
2678 : // static
2679 0 : com::sun::star::lang::Locale LanguageTag::convertToLocaleWithFallback( const OUString& rBcp47 )
2680 : {
2681 0 : return LanguageTag( rBcp47).makeFallback().getLocale( true);
2682 : }
2683 :
2684 :
2685 : // static
2686 9 : bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicalized, bool bDisallowPrivate )
2687 : {
2688 9 : bool bValid = false;
2689 :
2690 : struct guard
2691 : {
2692 : lt_tag_t* mpLangtag;
2693 9 : guard()
2694 : {
2695 9 : theDataRef::get().init();
2696 9 : mpLangtag = lt_tag_new();
2697 9 : }
2698 9 : ~guard()
2699 : {
2700 9 : lt_tag_unref( mpLangtag);
2701 9 : }
2702 9 : } aVar;
2703 :
2704 18 : myLtError aError;
2705 :
2706 9 : if (lt_tag_parse( aVar.mpLangtag, OUStringToOString( rString, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
2707 : {
2708 8 : char* pTag = lt_tag_canonicalize( aVar.mpLangtag, &aError.p);
2709 : SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTag:isValidBcp47: could not canonicalize '" << rString << "'");
2710 8 : if (pTag)
2711 : {
2712 8 : bValid = true;
2713 8 : if (bDisallowPrivate)
2714 : {
2715 3 : const lt_string_t* pPrivate = lt_tag_get_privateuse( aVar.mpLangtag);
2716 3 : if (pPrivate && lt_string_length( pPrivate) > 0)
2717 1 : bValid = false;
2718 : else
2719 : {
2720 2 : const lt_lang_t* pLangT = lt_tag_get_language( aVar.mpLangtag);
2721 2 : if (pLangT)
2722 : {
2723 2 : const char* pLang = lt_lang_get_tag( pLangT);
2724 2 : if (pLang && strcmp( pLang, I18NLANGTAG_QLT) == 0)
2725 : {
2726 : // Disallow 'qlt' privateuse code to prevent
2727 : // confusion with our internal usage.
2728 0 : bValid = false;
2729 : }
2730 : }
2731 : }
2732 : }
2733 8 : if (o_pCanonicalized)
2734 8 : *o_pCanonicalized = OUString::createFromAscii( pTag);
2735 8 : free( pTag);
2736 8 : return bValid;
2737 : }
2738 : }
2739 : else
2740 : {
2741 : SAL_INFO( "i18nlangtag", "LanguageTag:isValidBcp47: could not parse '" << rString << "'");
2742 : }
2743 10 : return bValid;
2744 : }
2745 :
2746 10244 : LanguageTag makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage)
2747 : {
2748 : //map the simple ones via LanguageTypes, and the hard ones explicitly
2749 10244 : LanguageType nLang(LANGUAGE_DONTKNOW);
2750 :
2751 10244 : switch (nLanguage)
2752 : {
2753 : case AppleLanguageId::ENGLISH:
2754 10244 : nLang = LANGUAGE_ENGLISH;
2755 10244 : break;
2756 : case AppleLanguageId::FRENCH:
2757 0 : nLang = LANGUAGE_FRENCH;
2758 0 : break;
2759 : case AppleLanguageId::GERMAN:
2760 0 : nLang = LANGUAGE_GERMAN;
2761 0 : break;
2762 : case AppleLanguageId::ITALIAN:
2763 0 : nLang = LANGUAGE_ITALIAN;
2764 0 : break;
2765 : case AppleLanguageId::DUTCH:
2766 0 : nLang = LANGUAGE_DUTCH;
2767 0 : break;
2768 : case AppleLanguageId::SWEDISH:
2769 0 : nLang = LANGUAGE_SWEDISH;
2770 0 : break;
2771 : case AppleLanguageId::SPANISH:
2772 0 : nLang = LANGUAGE_SPANISH;
2773 0 : break;
2774 : case AppleLanguageId::DANISH:
2775 0 : nLang = LANGUAGE_DANISH;
2776 0 : break;
2777 : case AppleLanguageId::PORTUGUESE:
2778 0 : nLang = LANGUAGE_PORTUGUESE;
2779 0 : break;
2780 : case AppleLanguageId::NORWEGIAN:
2781 0 : nLang = LANGUAGE_NORWEGIAN;
2782 0 : break;
2783 : case AppleLanguageId::HEBREW:
2784 0 : nLang = LANGUAGE_HEBREW;
2785 0 : break;
2786 : case AppleLanguageId::JAPANESE:
2787 0 : nLang = LANGUAGE_JAPANESE;
2788 0 : break;
2789 : case AppleLanguageId::ARABIC:
2790 0 : nLang = LANGUAGE_ARABIC_PRIMARY_ONLY;
2791 0 : break;
2792 : case AppleLanguageId::FINNISH:
2793 0 : nLang = LANGUAGE_FINNISH;
2794 0 : break;
2795 : case AppleLanguageId::GREEK:
2796 0 : nLang = LANGUAGE_GREEK;
2797 0 : break;
2798 : case AppleLanguageId::ICELANDIC:
2799 0 : nLang = LANGUAGE_ICELANDIC;
2800 0 : break;
2801 : case AppleLanguageId::MALTESE:
2802 0 : nLang = LANGUAGE_MALTESE;
2803 0 : break;
2804 : case AppleLanguageId::TURKISH:
2805 0 : nLang = LANGUAGE_TURKISH;
2806 0 : break;
2807 : case AppleLanguageId::CROATIAN:
2808 0 : nLang = LANGUAGE_CROATIAN;
2809 0 : break;
2810 : case AppleLanguageId::CHINESE_TRADITIONAL:
2811 0 : nLang = LANGUAGE_CHINESE_TRADITIONAL;
2812 0 : break;
2813 : case AppleLanguageId::URDU:
2814 0 : nLang = LANGUAGE_URDU_PAKISTAN; //probably, otherwise we need a LANGUAGE_URDU_PRIMARY_ONLY
2815 0 : break;
2816 : case AppleLanguageId::HINDI:
2817 0 : nLang = LANGUAGE_HINDI;
2818 0 : break;
2819 : case AppleLanguageId::THAI:
2820 0 : nLang = LANGUAGE_THAI;
2821 0 : break;
2822 : case AppleLanguageId::KOREAN:
2823 0 : nLang = LANGUAGE_KOREAN;
2824 0 : break;
2825 : case AppleLanguageId::LITHUANIAN:
2826 0 : nLang = LANGUAGE_LITHUANIAN;
2827 0 : break;
2828 : case AppleLanguageId::POLISH:
2829 0 : nLang = LANGUAGE_POLISH;
2830 0 : break;
2831 : case AppleLanguageId::HUNGARIAN:
2832 0 : nLang = LANGUAGE_HUNGARIAN;
2833 0 : break;
2834 : case AppleLanguageId::ESTONIAN:
2835 0 : nLang = LANGUAGE_ESTONIAN;
2836 0 : break;
2837 : case AppleLanguageId::LATVIAN:
2838 0 : nLang = LANGUAGE_LATVIAN;
2839 0 : break;
2840 : case AppleLanguageId::SAMI:
2841 0 : nLang = LANGUAGE_SAMI_NORTHERN_NORWAY; //maybe
2842 0 : break;
2843 : case AppleLanguageId::FAROESE:
2844 0 : nLang = LANGUAGE_FAEROESE;
2845 0 : break;
2846 : case AppleLanguageId::FARSI:
2847 0 : nLang = LANGUAGE_FARSI;
2848 0 : break;
2849 : case AppleLanguageId::RUSSIAN:
2850 0 : nLang = LANGUAGE_RUSSIAN;
2851 0 : break;
2852 : case AppleLanguageId::CHINESE_SIMPLIFIED:
2853 0 : nLang = LANGUAGE_CHINESE_SIMPLIFIED;
2854 0 : break;
2855 : case AppleLanguageId::FLEMISH:
2856 0 : nLang = LANGUAGE_DUTCH_BELGIAN;
2857 0 : break;
2858 : case AppleLanguageId::IRISH_GAELIC:
2859 0 : nLang = LANGUAGE_GAELIC_IRELAND;
2860 0 : break;
2861 : case AppleLanguageId::ALBANIAN:
2862 0 : nLang = LANGUAGE_ALBANIAN;
2863 0 : break;
2864 : case AppleLanguageId::ROMANIAN:
2865 0 : nLang = LANGUAGE_ROMANIAN;
2866 0 : break;
2867 : case AppleLanguageId::CZECH:
2868 0 : nLang = LANGUAGE_CZECH;
2869 0 : break;
2870 : case AppleLanguageId::SLOVAK:
2871 0 : nLang = LANGUAGE_SLOVAK;
2872 0 : break;
2873 : case AppleLanguageId::SLOVENIAN:
2874 0 : nLang = LANGUAGE_SLOVENIAN;
2875 0 : break;
2876 : case AppleLanguageId::YIDDISH:
2877 0 : nLang = LANGUAGE_YIDDISH;
2878 0 : break;
2879 : case AppleLanguageId::SERBIAN:
2880 0 : nLang = LANGUAGE_SERBIAN_CYRILLIC_SERBIA; //maybe
2881 0 : break;
2882 : case AppleLanguageId::MACEDONIAN:
2883 0 : nLang = LANGUAGE_MACEDONIAN;
2884 0 : break;
2885 : case AppleLanguageId::BULGARIAN:
2886 0 : nLang = LANGUAGE_BULGARIAN;
2887 0 : break;
2888 : case AppleLanguageId::UKRAINIAN:
2889 0 : nLang = LANGUAGE_UKRAINIAN;
2890 0 : break;
2891 : case AppleLanguageId::BYELORUSSIAN:
2892 0 : nLang = LANGUAGE_BELARUSIAN;
2893 0 : break;
2894 : case AppleLanguageId::UZBEK:
2895 0 : nLang = LANGUAGE_UZBEK_CYRILLIC; //maybe
2896 0 : break;
2897 : case AppleLanguageId::KAZAKH:
2898 0 : nLang = LANGUAGE_KAZAKH;
2899 0 : break;
2900 : case AppleLanguageId::AZERI_CYRILLIC:
2901 0 : nLang = LANGUAGE_AZERI_CYRILLIC;
2902 0 : break;
2903 : case AppleLanguageId::AZERI_ARABIC:
2904 0 : return LanguageTag("az-Arab");
2905 : break;
2906 : case AppleLanguageId::ARMENIAN:
2907 0 : nLang = LANGUAGE_ARMENIAN;
2908 0 : break;
2909 : case AppleLanguageId::GEORGIAN:
2910 0 : nLang = LANGUAGE_GEORGIAN;
2911 0 : break;
2912 : case AppleLanguageId::MOLDAVIAN:
2913 0 : nLang = LANGUAGE_ROMANIAN_MOLDOVA;
2914 0 : break;
2915 : case AppleLanguageId::KIRGHIZ:
2916 0 : nLang = LANGUAGE_KIRGHIZ;
2917 0 : break;
2918 : case AppleLanguageId::TAJIKI:
2919 0 : nLang = LANGUAGE_TAJIK;
2920 0 : break;
2921 : case AppleLanguageId::TURKMEN:
2922 0 : nLang = LANGUAGE_TURKMEN;
2923 0 : break;
2924 : case AppleLanguageId::MONGOLIAN_MONGOLIAN:
2925 0 : nLang = LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA;
2926 0 : break;
2927 : case AppleLanguageId::MONGOLIAN_CYRILLIC:
2928 0 : nLang = LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA;
2929 0 : break;
2930 : case AppleLanguageId::PASHTO:
2931 0 : nLang = LANGUAGE_PASHTO;
2932 0 : break;
2933 : case AppleLanguageId::KURDISH:
2934 0 : nLang = LANGUAGE_USER_KURDISH_TURKEY; //maybe
2935 0 : break;
2936 : case AppleLanguageId::KASHMIRI:
2937 0 : nLang = LANGUAGE_KASHMIRI;
2938 0 : break;
2939 : case AppleLanguageId::SINDHI:
2940 0 : nLang = LANGUAGE_SINDHI;
2941 0 : break;
2942 : case AppleLanguageId::TIBETAN:
2943 0 : nLang = LANGUAGE_TIBETAN;
2944 0 : break;
2945 : case AppleLanguageId::NEPALI:
2946 0 : nLang = LANGUAGE_NEPALI;
2947 0 : break;
2948 : case AppleLanguageId::SANSKRIT:
2949 0 : nLang = LANGUAGE_SANSKRIT;
2950 0 : break;
2951 : case AppleLanguageId::MARATHI:
2952 0 : nLang = LANGUAGE_MARATHI;
2953 0 : break;
2954 : case AppleLanguageId::BENGALI:
2955 0 : nLang = LANGUAGE_BENGALI;
2956 0 : break;
2957 : case AppleLanguageId::ASSAMESE:
2958 0 : nLang = LANGUAGE_ASSAMESE;
2959 0 : break;
2960 : case AppleLanguageId::GUJARATI:
2961 0 : nLang = LANGUAGE_GUJARATI;
2962 0 : break;
2963 : case AppleLanguageId::PUNJABI:
2964 0 : nLang = LANGUAGE_PUNJABI;
2965 0 : break;
2966 : case AppleLanguageId::ORIYA:
2967 0 : nLang = LANGUAGE_ODIA;
2968 0 : break;
2969 : case AppleLanguageId::MALAYALAM:
2970 0 : nLang = LANGUAGE_MALAYALAM;
2971 0 : break;
2972 : case AppleLanguageId::KANNADA:
2973 0 : nLang = LANGUAGE_KANNADA;
2974 0 : break;
2975 : case AppleLanguageId::TAMIL:
2976 0 : nLang = LANGUAGE_TAMIL;
2977 0 : break;
2978 : case AppleLanguageId::TELUGU:
2979 0 : nLang = LANGUAGE_TELUGU;
2980 0 : break;
2981 : case AppleLanguageId::SINHALESE:
2982 0 : nLang = LANGUAGE_SINHALESE_SRI_LANKA;
2983 0 : break;
2984 : case AppleLanguageId::BURMESE:
2985 0 : nLang = LANGUAGE_BURMESE;
2986 0 : break;
2987 : case AppleLanguageId::KHMER:
2988 0 : nLang = LANGUAGE_KHMER;
2989 0 : break;
2990 : case AppleLanguageId::LAO:
2991 0 : nLang = LANGUAGE_LAO;
2992 0 : break;
2993 : case AppleLanguageId::VIETNAMESE:
2994 0 : nLang = LANGUAGE_VIETNAMESE;
2995 0 : break;
2996 : case AppleLanguageId::INDONESIAN:
2997 0 : nLang = LANGUAGE_INDONESIAN;
2998 0 : break;
2999 : case AppleLanguageId::TAGALONG:
3000 0 : nLang = LANGUAGE_USER_TAGALOG;
3001 0 : break;
3002 : case AppleLanguageId::MALAY_LATIN:
3003 0 : nLang = LANGUAGE_MALAY_MALAYSIA;
3004 0 : break;
3005 : case AppleLanguageId::MALAY_ARABIC:
3006 0 : return LanguageTag("ms-Arab");
3007 : break;
3008 : case AppleLanguageId::AMHARIC:
3009 0 : nLang = LANGUAGE_AMHARIC_ETHIOPIA;
3010 0 : break;
3011 : case AppleLanguageId::TIGRINYA:
3012 0 : nLang = LANGUAGE_TIGRIGNA_ETHIOPIA;
3013 0 : break;
3014 : case AppleLanguageId::GALLA:
3015 0 : nLang = LANGUAGE_OROMO;
3016 0 : break;
3017 : case AppleLanguageId::SOMALI:
3018 0 : nLang = LANGUAGE_SOMALI;
3019 0 : break;
3020 : case AppleLanguageId::SWAHILI:
3021 0 : nLang = LANGUAGE_SWAHILI;
3022 0 : break;
3023 : case AppleLanguageId::KINYARWANDA:
3024 0 : nLang = LANGUAGE_KINYARWANDA_RWANDA;
3025 0 : break;
3026 : case AppleLanguageId::RUNDI:
3027 0 : return LanguageTag("rn");
3028 : break;
3029 : case AppleLanguageId::NYANJA:
3030 0 : nLang = LANGUAGE_USER_NYANJA;
3031 0 : break;
3032 : case AppleLanguageId::MALAGASY:
3033 0 : nLang = LANGUAGE_MALAGASY_PLATEAU;
3034 0 : break;
3035 : case AppleLanguageId::ESPERANTO:
3036 0 : nLang = LANGUAGE_USER_ESPERANTO;
3037 0 : break;
3038 : case AppleLanguageId::WELSH:
3039 0 : nLang = LANGUAGE_WELSH;
3040 0 : break;
3041 : case AppleLanguageId::BASQUE:
3042 0 : nLang = LANGUAGE_BASQUE;
3043 0 : break;
3044 : case AppleLanguageId::CATALAN:
3045 0 : nLang = LANGUAGE_CATALAN;
3046 0 : break;
3047 : case AppleLanguageId::LATIN:
3048 0 : nLang = LANGUAGE_USER_LATIN;
3049 0 : break;
3050 : case AppleLanguageId::QUENCHUA:
3051 0 : nLang = LANGUAGE_QUECHUA_BOLIVIA; //maybe
3052 0 : break;
3053 : case AppleLanguageId::GUARANI:
3054 0 : nLang = LANGUAGE_GUARANI_PARAGUAY;
3055 0 : break;
3056 : case AppleLanguageId::AYMARA:
3057 0 : return LanguageTag("ay");
3058 : break;
3059 : case AppleLanguageId::TATAR:
3060 0 : nLang = LANGUAGE_TATAR;
3061 0 : break;
3062 : case AppleLanguageId::UIGHUR:
3063 0 : nLang = LANGUAGE_UIGHUR_CHINA;
3064 0 : break;
3065 : case AppleLanguageId::DZONGKHA:
3066 0 : nLang = LANGUAGE_DZONGKHA;
3067 0 : break;
3068 : case AppleLanguageId::JAVANESE_LATIN:
3069 0 : return LanguageTag("jv-Latn");
3070 : break;
3071 : case AppleLanguageId::SUNDANESE_LATIN:
3072 0 : return LanguageTag("su-Latn");
3073 : break;
3074 : case AppleLanguageId::GALICIAN:
3075 0 : nLang = LANGUAGE_GALICIAN;
3076 0 : break;
3077 : case AppleLanguageId::AFRIKAANS:
3078 0 : nLang = LANGUAGE_AFRIKAANS;
3079 0 : break;
3080 : case AppleLanguageId::BRETON:
3081 0 : nLang = LANGUAGE_BRETON_FRANCE;
3082 0 : break;
3083 : case AppleLanguageId::INUKTITUT:
3084 0 : nLang = LANGUAGE_INUKTITUT_LATIN_CANADA; //probably
3085 0 : break;
3086 : case AppleLanguageId::SCOTTISH_GAELIC:
3087 0 : nLang = LANGUAGE_GAELIC_SCOTLAND;
3088 0 : break;
3089 : case AppleLanguageId::MANX_GAELIC:
3090 0 : nLang = LANGUAGE_USER_MANX;
3091 0 : break;
3092 : case AppleLanguageId::IRISH_GAELIC_WITH_DOT_ABOVE:
3093 0 : return LanguageTag("ga-Latg");
3094 : break;
3095 : case AppleLanguageId::TONGAN:
3096 0 : return LanguageTag("to");
3097 : break;
3098 : case AppleLanguageId::GREEK_POLYTONIC:
3099 0 : nLang = LANGUAGE_USER_ANCIENT_GREEK;
3100 0 : break;
3101 : case AppleLanguageId::GREENLANDIC:
3102 0 : nLang = LANGUAGE_KALAALLISUT_GREENLAND;
3103 0 : break;
3104 : case AppleLanguageId::AZERI_LATIN:
3105 0 : nLang = LANGUAGE_AZERI_LATIN;
3106 0 : break;
3107 : }
3108 :
3109 10244 : return LanguageTag(nLang);
3110 : }
3111 :
3112 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|