Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : */
9 :
10 : #include <config_folders.h>
11 :
12 : #include "i18nlangtag/languagetag.hxx"
13 : #include "i18nlangtag/applelangid.hxx"
14 : #include "i18nlangtag/mslangid.hxx"
15 : #include <rtl/ustrbuf.hxx>
16 : #include <rtl/bootstrap.hxx>
17 : #include <osl/file.hxx>
18 : #include <rtl/instance.hxx>
19 : #include <rtl/locale.h>
20 : #include <boost/unordered_set.hpp>
21 : #include <map>
22 :
23 : //#define erDEBUG
24 :
25 : #if defined(ENABLE_LIBLANGTAG)
26 : #include <liblangtag/langtag.h>
27 : #else
28 : /* Replacement code for LGPL phobic and Android systems.
29 : * For iOS we could probably use NSLocale instead, that should have more or
30 : * less required functionality. If it is good enough, it could be used for Mac
31 : * OS X, too.
32 : */
33 : #include "simple-langtag.cxx"
34 : #endif
35 :
36 : using namespace com::sun::star;
37 :
38 :
39 : // Helper to ensure lt_error_t is free'd
40 : struct myLtError
41 : {
42 : lt_error_t* p;
43 62 : myLtError() : p(NULL) {}
44 62 : ~myLtError() { if (p) lt_error_unref( p); }
45 : };
46 :
47 : // "statics" to be returned as const reference to an empty locale and string.
48 : namespace {
49 : struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
50 : struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
51 : }
52 :
53 : typedef ::boost::unordered_set< OUString, OUStringHash > KnownTagSet;
54 : namespace {
55 : struct theKnowns : public rtl::Static< KnownTagSet, theKnowns > {};
56 : struct theMutex : public rtl::Static< osl::Mutex, theMutex > {};
57 : }
58 :
59 28 : static const KnownTagSet & getKnowns()
60 : {
61 28 : KnownTagSet & rKnowns = theKnowns::get();
62 28 : if (rKnowns.empty())
63 : {
64 22 : osl::MutexGuard aGuard( theMutex::get());
65 22 : if (rKnowns.empty())
66 : {
67 22 : ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
68 36564 : for (::std::vector< MsLangId::LanguagetagMapping >::const_iterator it( aDefined.begin());
69 24376 : it != aDefined.end(); ++it)
70 : {
71 : // Do not use the BCP47 string here to initialize the
72 : // LanguageTag because then canonicalize() would call this
73 : // getKnowns() again..
74 12166 : ::std::vector< OUString > aFallbacks( LanguageTag( (*it).mnLang).getFallbackStrings( true));
75 40194 : for (::std::vector< OUString >::const_iterator fb( aFallbacks.begin()); fb != aFallbacks.end(); ++fb)
76 : {
77 28028 : rKnowns.insert( *fb);
78 : }
79 12188 : }
80 22 : }
81 : }
82 28 : return rKnowns;
83 : }
84 :
85 :
86 : namespace {
87 : struct compareIgnoreAsciiCaseLess
88 : {
89 8479942 : bool operator()( const OUString& r1, const OUString& r2 ) const
90 : {
91 8479942 : return r1.compareToIgnoreAsciiCase( r2) < 0;
92 : }
93 : };
94 : typedef ::std::map< OUString, LanguageTag::ImplPtr, compareIgnoreAsciiCaseLess > MapBcp47;
95 : typedef ::std::map< LanguageType, LanguageTag::ImplPtr > MapLangID;
96 : struct theMapBcp47 : public rtl::Static< MapBcp47, theMapBcp47 > {};
97 : struct theMapLangID : public rtl::Static< MapLangID, theMapLangID > {};
98 : struct theDontKnow : public rtl::Static< LanguageTag::ImplPtr, theDontKnow > {};
99 : struct theSystemLocale : public rtl::Static< LanguageTag::ImplPtr, theSystemLocale > {};
100 : }
101 :
102 :
103 254 : static LanguageType getNextOnTheFlyLanguage()
104 : {
105 : static LanguageType nOnTheFlyLanguage = 0;
106 254 : osl::MutexGuard aGuard( theMutex::get());
107 254 : if (!nOnTheFlyLanguage)
108 66 : nOnTheFlyLanguage = MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START, LANGUAGE_ON_THE_FLY_START);
109 : else
110 : {
111 188 : if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage) != LANGUAGE_ON_THE_FLY_END)
112 188 : ++nOnTheFlyLanguage;
113 : else
114 : {
115 0 : LanguageType nSub = MsLangId::getSubLanguage( nOnTheFlyLanguage);
116 0 : if (nSub != LANGUAGE_ON_THE_FLY_SUB_END)
117 0 : nOnTheFlyLanguage = MsLangId::makeLangID( ++nSub, LANGUAGE_ON_THE_FLY_START);
118 : else
119 : {
120 : SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
121 : << ((LANGUAGE_ON_THE_FLY_END - LANGUAGE_ON_THE_FLY_START + 1)
122 : * (LANGUAGE_ON_THE_FLY_SUB_END - LANGUAGE_ON_THE_FLY_SUB_START + 1))
123 : << " consumed?!?)");
124 0 : return 0;
125 : }
126 : }
127 : }
128 : #if OSL_DEBUG_LEVEL > 0
129 : static size_t nOnTheFlies = 0;
130 : ++nOnTheFlies;
131 : SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies);
132 : #endif
133 254 : return nOnTheFlyLanguage;
134 : }
135 :
136 :
137 : // static
138 224675 : bool LanguageTag::isOnTheFlyID( LanguageType nLang )
139 : {
140 224675 : LanguageType nPri = MsLangId::getPrimaryLanguage( nLang);
141 224675 : LanguageType nSub = MsLangId::getSubLanguage( nLang);
142 : return
143 1110 : LANGUAGE_ON_THE_FLY_START <= nPri && nPri <= LANGUAGE_ON_THE_FLY_END &&
144 225785 : LANGUAGE_ON_THE_FLY_SUB_START <= nSub && nSub <= LANGUAGE_ON_THE_FLY_SUB_END;
145 : }
146 :
147 :
148 : /** A reference holder for liblangtag data de/initialization, one static
149 : instance. Currently implemented such that the first "ref" inits and dtor
150 : (our library deinitialized) tears down.
151 : */
152 : class LiblantagDataRef
153 : {
154 : public:
155 : LiblantagDataRef();
156 : ~LiblantagDataRef();
157 58 : inline void incRef()
158 : {
159 58 : if (mnRef != SAL_MAX_UINT32 && !mnRef++)
160 22 : setup();
161 58 : }
162 80 : inline void decRef()
163 : {
164 80 : if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef)
165 22 : teardown();
166 80 : }
167 : private:
168 : OString maDataPath; // path to liblangtag data, "|" if system
169 : sal_uInt32 mnRef;
170 :
171 : void setupDataPath();
172 : void setup();
173 : void teardown();
174 : };
175 :
176 : namespace {
177 : struct theDataRef : public rtl::Static< LiblantagDataRef, theDataRef > {};
178 : }
179 :
180 22 : LiblantagDataRef::LiblantagDataRef()
181 : :
182 22 : mnRef(0)
183 : {
184 22 : }
185 :
186 44 : LiblantagDataRef::~LiblantagDataRef()
187 : {
188 : // When destructed we're tearing down unconditionally.
189 22 : if (mnRef)
190 22 : mnRef = 1;
191 22 : decRef();
192 22 : }
193 :
194 22 : void LiblantagDataRef::setup()
195 : {
196 : SAL_INFO( "i18nlangtag", "LiblantagDataRef::setup: initializing database");
197 22 : if (maDataPath.isEmpty())
198 22 : setupDataPath();
199 22 : lt_db_initialize();
200 : // Hold ref eternally.
201 22 : mnRef = SAL_MAX_UINT32;
202 22 : }
203 :
204 22 : void LiblantagDataRef::teardown()
205 : {
206 : SAL_INFO( "i18nlangtag", "LiblantagDataRef::teardown: finalizing database");
207 22 : lt_db_finalize();
208 22 : }
209 :
210 22 : void LiblantagDataRef::setupDataPath()
211 : {
212 : // maDataPath is assumed to be empty here.
213 22 : OUString aURL("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/liblangtag");
214 22 : rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
215 :
216 : // Check if data is in our own installation, else assume system
217 : // installation.
218 44 : OUString aData( aURL);
219 22 : aData += "/language-subtag-registry.xml";
220 44 : osl::DirectoryItem aDirItem;
221 22 : if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
222 : {
223 0 : OUString aPath;
224 0 : if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
225 0 : maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
226 : }
227 22 : if (maDataPath.isEmpty())
228 22 : maDataPath = "|"; // assume system
229 : else
230 22 : lt_db_set_datadir( maDataPath.getStr());
231 22 : }
232 :
233 :
234 : /* TODO: we could transform known vendor and browser-specific variants to known
235 : * BCP 47 if available. For now just remove them to not confuse any later
236 : * treatments that check for empty variants. This vendor stuff was never
237 : * supported anyway. */
238 734994 : static void handleVendorVariant( com::sun::star::lang::Locale & rLocale )
239 : {
240 734994 : if (!rLocale.Variant.isEmpty() && rLocale.Language != I18NLANGTAG_QLT)
241 14 : rLocale.Variant = OUString();
242 734994 : }
243 :
244 :
245 : class LanguageTagImpl
246 : {
247 : public:
248 :
249 : explicit LanguageTagImpl( const LanguageTag & rLanguageTag );
250 : explicit LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl );
251 : ~LanguageTagImpl();
252 : LanguageTagImpl& operator=( const LanguageTagImpl & rLanguageTagImpl );
253 :
254 : private:
255 :
256 : friend class LanguageTag;
257 :
258 : enum Decision
259 : {
260 : DECISION_DONTKNOW,
261 : DECISION_NO,
262 : DECISION_YES
263 : };
264 :
265 : mutable com::sun::star::lang::Locale maLocale;
266 : mutable OUString maBcp47;
267 : mutable OUString maCachedLanguage; ///< cache getLanguage()
268 : mutable OUString maCachedScript; ///< cache getScript()
269 : mutable OUString maCachedCountry; ///< cache getCountry()
270 : mutable OUString maCachedVariants; ///< cache getVariants()
271 : mutable lt_tag_t* mpImplLangtag; ///< liblangtag pointer
272 : mutable LanguageType mnLangID;
273 : mutable Decision meIsValid;
274 : mutable Decision meIsIsoLocale;
275 : mutable Decision meIsIsoODF;
276 : mutable Decision meIsLiblangtagNeeded; ///< whether processing with liblangtag needed
277 : bool mbSystemLocale : 1;
278 : mutable bool mbInitializedBcp47 : 1;
279 : mutable bool mbInitializedLocale : 1;
280 : mutable bool mbInitializedLangID : 1;
281 : mutable bool mbCachedLanguage : 1;
282 : mutable bool mbCachedScript : 1;
283 : mutable bool mbCachedCountry : 1;
284 : mutable bool mbCachedVariants : 1;
285 :
286 : const OUString & getBcp47() const;
287 : OUString getLanguage() const;
288 : OUString getScript() const;
289 : OUString getCountry() const;
290 : OUString getRegion() const;
291 : OUString getVariants() const;
292 : bool hasScript() const;
293 :
294 : bool isIsoLocale() const;
295 : bool isIsoODF() const;
296 : bool isValidBcp47() const;
297 :
298 : void convertLocaleToBcp47();
299 : void convertLocaleToLang( bool bAllowOnTheFlyID );
300 : void convertBcp47ToLocale();
301 : void convertBcp47ToLang();
302 : void convertLangToLocale();
303 : void convertLangToBcp47();
304 :
305 : /** @return whether BCP 47 language tag string was changed. */
306 : bool canonicalize();
307 :
308 : /** Canonicalize if not yet done and synchronize initialized conversions.
309 :
310 : @return whether BCP 47 language tag string was changed.
311 : */
312 : bool synCanonicalize();
313 :
314 : OUString getLanguageFromLangtag();
315 : OUString getScriptFromLangtag();
316 : OUString getRegionFromLangtag();
317 : OUString getVariantsFromLangtag();
318 :
319 : /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
320 :
321 : @param nRegisterID
322 : If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
323 : instead of generating an on-the-fly ID. Implementation may
324 : still generate an ID if the suggested ID is already used for
325 : another language tag.
326 :
327 : @return NULL if no ID could be obtained or registration failed.
328 : */
329 : LanguageTag::ImplPtr registerOnTheFly( LanguageType nRegisterID );
330 :
331 : /** Obtain Language, Script, Country and Variants via simpleExtract() and
332 : assign them to the cached variables if successful.
333 :
334 : @return return of simpleExtract()
335 : */
336 : bool cacheSimpleLSCV();
337 :
338 : enum Extraction
339 : {
340 : EXTRACTED_NONE,
341 : EXTRACTED_LSC,
342 : EXTRACTED_LV,
343 : EXTRACTED_X,
344 : EXTRACTED_X_JOKER
345 : };
346 :
347 : /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
348 : portions.
349 :
350 : Does not check case or content!
351 :
352 : @return EXTRACTED_LSC if simple tag was detected (i.e. one that
353 : would fulfill the isIsoODF() condition),
354 : EXTRACTED_LV if a tag with variant was detected,
355 : EXTRACTED_X if x-... privateuse tag was detected,
356 : EXTRACTED_X_JOKER if "*" joker was detected,
357 : EXTRACTED_NONE else.
358 : */
359 : static Extraction simpleExtract( const OUString& rBcp47,
360 : OUString& rLanguage,
361 : OUString& rScript,
362 : OUString& rCountry,
363 : OUString& rVariants );
364 :
365 : /** Convert Locale to BCP 47 string without resolving system and creating
366 : temporary LanguageTag instances. */
367 : static OUString convertToBcp47( const com::sun::star::lang::Locale& rLocale );
368 : };
369 :
370 :
371 251874 : LanguageTagImpl::LanguageTagImpl( const LanguageTag & rLanguageTag )
372 : :
373 : maLocale( rLanguageTag.maLocale),
374 : maBcp47( rLanguageTag.maBcp47),
375 : mpImplLangtag( NULL),
376 : mnLangID( rLanguageTag.mnLangID),
377 : meIsValid( DECISION_DONTKNOW),
378 : meIsIsoLocale( DECISION_DONTKNOW),
379 : meIsIsoODF( DECISION_DONTKNOW),
380 : meIsLiblangtagNeeded( DECISION_DONTKNOW),
381 : mbSystemLocale( rLanguageTag.mbSystemLocale),
382 : mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
383 : mbInitializedLocale( rLanguageTag.mbInitializedLocale),
384 : mbInitializedLangID( rLanguageTag.mbInitializedLangID),
385 : mbCachedLanguage( false),
386 : mbCachedScript( false),
387 : mbCachedCountry( false),
388 251874 : mbCachedVariants( false)
389 : {
390 251874 : }
391 :
392 :
393 0 : LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl )
394 : :
395 : maLocale( rLanguageTagImpl.maLocale),
396 : maBcp47( rLanguageTagImpl.maBcp47),
397 : maCachedLanguage( rLanguageTagImpl.maCachedLanguage),
398 : maCachedScript( rLanguageTagImpl.maCachedScript),
399 : maCachedCountry( rLanguageTagImpl.maCachedCountry),
400 : maCachedVariants( rLanguageTagImpl.maCachedVariants),
401 : mpImplLangtag( rLanguageTagImpl.mpImplLangtag ?
402 0 : lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : NULL),
403 : mnLangID( rLanguageTagImpl.mnLangID),
404 : meIsValid( rLanguageTagImpl.meIsValid),
405 : meIsIsoLocale( rLanguageTagImpl.meIsIsoLocale),
406 : meIsIsoODF( rLanguageTagImpl.meIsIsoODF),
407 : meIsLiblangtagNeeded( rLanguageTagImpl.meIsLiblangtagNeeded),
408 : mbSystemLocale( rLanguageTagImpl.mbSystemLocale),
409 : mbInitializedBcp47( rLanguageTagImpl.mbInitializedBcp47),
410 : mbInitializedLocale( rLanguageTagImpl.mbInitializedLocale),
411 : mbInitializedLangID( rLanguageTagImpl.mbInitializedLangID),
412 : mbCachedLanguage( rLanguageTagImpl.mbCachedLanguage),
413 : mbCachedScript( rLanguageTagImpl.mbCachedScript),
414 : mbCachedCountry( rLanguageTagImpl.mbCachedCountry),
415 0 : mbCachedVariants( rLanguageTagImpl.mbCachedVariants)
416 : {
417 0 : if (mpImplLangtag)
418 0 : theDataRef::get().incRef();
419 0 : }
420 :
421 :
422 0 : LanguageTagImpl& LanguageTagImpl::operator=( const LanguageTagImpl & rLanguageTagImpl )
423 : {
424 0 : if (&rLanguageTagImpl == this)
425 0 : return *this;
426 :
427 0 : maLocale = rLanguageTagImpl.maLocale;
428 0 : maBcp47 = rLanguageTagImpl.maBcp47;
429 0 : maCachedLanguage = rLanguageTagImpl.maCachedLanguage;
430 0 : maCachedScript = rLanguageTagImpl.maCachedScript;
431 0 : maCachedCountry = rLanguageTagImpl.maCachedCountry;
432 0 : maCachedVariants = rLanguageTagImpl.maCachedVariants;
433 0 : lt_tag_t * oldTag = mpImplLangtag;
434 : mpImplLangtag = rLanguageTagImpl.mpImplLangtag ?
435 0 : lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : NULL;
436 0 : lt_tag_unref(oldTag);
437 0 : mnLangID = rLanguageTagImpl.mnLangID;
438 0 : meIsValid = rLanguageTagImpl.meIsValid;
439 0 : meIsIsoLocale = rLanguageTagImpl.meIsIsoLocale;
440 0 : meIsIsoODF = rLanguageTagImpl.meIsIsoODF;
441 0 : meIsLiblangtagNeeded= rLanguageTagImpl.meIsLiblangtagNeeded;
442 0 : mbSystemLocale = rLanguageTagImpl.mbSystemLocale;
443 0 : mbInitializedBcp47 = rLanguageTagImpl.mbInitializedBcp47;
444 0 : mbInitializedLocale = rLanguageTagImpl.mbInitializedLocale;
445 0 : mbInitializedLangID = rLanguageTagImpl.mbInitializedLangID;
446 0 : mbCachedLanguage = rLanguageTagImpl.mbCachedLanguage;
447 0 : mbCachedScript = rLanguageTagImpl.mbCachedScript;
448 0 : mbCachedCountry = rLanguageTagImpl.mbCachedCountry;
449 0 : mbCachedVariants = rLanguageTagImpl.mbCachedVariants;
450 0 : if (mpImplLangtag && !oldTag)
451 0 : theDataRef::get().incRef();
452 0 : else if (!mpImplLangtag && oldTag)
453 0 : theDataRef::get().decRef();
454 0 : return *this;
455 : }
456 :
457 :
458 503434 : LanguageTagImpl::~LanguageTagImpl()
459 : {
460 251717 : if (mpImplLangtag)
461 : {
462 40 : lt_tag_unref( mpImplLangtag);
463 40 : theDataRef::get().decRef();
464 : }
465 251717 : }
466 :
467 :
468 416924 : LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
469 : :
470 : maBcp47( rBcp47LanguageTag),
471 : mnLangID( LANGUAGE_DONTKNOW),
472 416924 : mbSystemLocale( rBcp47LanguageTag.isEmpty()),
473 416924 : mbInitializedBcp47( !mbSystemLocale),
474 : mbInitializedLocale( false),
475 : mbInitializedLangID( false),
476 1250772 : mbIsFallback( false)
477 : {
478 416924 : if (bCanonicalize)
479 : {
480 15603 : getImpl()->canonicalize();
481 : // Registration itself may already have canonicalized, so do an
482 : // unconditional sync.
483 15603 : syncFromImpl();
484 : }
485 :
486 416924 : }
487 :
488 :
489 734328 : LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
490 : :
491 : maLocale( rLocale),
492 : mnLangID( LANGUAGE_DONTKNOW),
493 734328 : mbSystemLocale( rLocale.Language.isEmpty()),
494 : mbInitializedBcp47( false),
495 734328 : mbInitializedLocale( !mbSystemLocale),
496 : mbInitializedLangID( false),
497 2202984 : mbIsFallback( false)
498 : {
499 734328 : handleVendorVariant( maLocale);
500 734328 : }
501 :
502 :
503 1030781 : LanguageTag::LanguageTag( LanguageType nLanguage )
504 : :
505 : mnLangID( nLanguage),
506 1030781 : mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
507 : mbInitializedBcp47( false),
508 : mbInitializedLocale( false),
509 1030781 : mbInitializedLangID( !mbSystemLocale),
510 3092343 : mbIsFallback( false)
511 : {
512 1030781 : }
513 :
514 :
515 1522 : LanguageTag::LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
516 : const OUString& rScript, const OUString& rCountry )
517 : :
518 : maBcp47( rBcp47),
519 : mnLangID( LANGUAGE_DONTKNOW),
520 1522 : mbSystemLocale( rBcp47.isEmpty() && rLanguage.isEmpty()),
521 1522 : mbInitializedBcp47( !rBcp47.isEmpty()),
522 : mbInitializedLocale( false),
523 : mbInitializedLangID( false),
524 4566 : mbIsFallback( false)
525 : {
526 1522 : if (!mbSystemLocale && !mbInitializedBcp47)
527 : {
528 1522 : if (rScript.isEmpty())
529 : {
530 1522 : maBcp47 = rLanguage + "-" + rCountry;
531 1522 : mbInitializedBcp47 = true;
532 1522 : maLocale.Language = rLanguage;
533 1522 : maLocale.Country = rCountry;
534 1522 : mbInitializedLocale = true;
535 : }
536 : else
537 : {
538 0 : if (rCountry.isEmpty())
539 0 : maBcp47 = rLanguage + "-" + rScript;
540 : else
541 0 : maBcp47 = rLanguage + "-" + rScript + "-" + rCountry;
542 0 : mbInitializedBcp47 = true;
543 0 : maLocale.Language = I18NLANGTAG_QLT;
544 0 : maLocale.Country = rCountry;
545 0 : maLocale.Variant = maBcp47;
546 0 : mbInitializedLocale = true;
547 : }
548 : }
549 1522 : }
550 :
551 :
552 306 : LanguageTag::LanguageTag( const rtl_Locale & rLocale )
553 : :
554 : maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
555 : mnLangID( LANGUAGE_DONTKNOW),
556 306 : mbSystemLocale( maLocale.Language.isEmpty()),
557 : mbInitializedBcp47( false),
558 306 : mbInitializedLocale( !mbSystemLocale),
559 : mbInitializedLangID( false),
560 918 : mbIsFallback( false)
561 : {
562 306 : convertFromRtlLocale();
563 306 : }
564 :
565 :
566 20538230 : LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
567 : :
568 : maLocale( rLanguageTag.maLocale),
569 : maBcp47( rLanguageTag.maBcp47),
570 : mnLangID( rLanguageTag.mnLangID),
571 : mpImpl( rLanguageTag.mpImpl),
572 : mbSystemLocale( rLanguageTag.mbSystemLocale),
573 : mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
574 : mbInitializedLocale( rLanguageTag.mbInitializedLocale),
575 : mbInitializedLangID( rLanguageTag.mbInitializedLangID),
576 20538230 : mbIsFallback(rLanguageTag.mbIsFallback)
577 : {
578 20538230 : }
579 :
580 :
581 184993 : LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
582 : {
583 184993 : if (&rLanguageTag == this)
584 0 : return *this;
585 :
586 184993 : maLocale = rLanguageTag.maLocale;
587 184993 : maBcp47 = rLanguageTag.maBcp47;
588 184993 : mnLangID = rLanguageTag.mnLangID;
589 184993 : mpImpl = rLanguageTag.mpImpl;
590 184993 : mbSystemLocale = rLanguageTag.mbSystemLocale;
591 184993 : mbInitializedBcp47 = rLanguageTag.mbInitializedBcp47;
592 184993 : mbInitializedLocale = rLanguageTag.mbInitializedLocale;
593 184993 : mbInitializedLangID = rLanguageTag.mbInitializedLangID;
594 184993 : return *this;
595 : }
596 :
597 :
598 22712425 : LanguageTag::~LanguageTag()
599 : {
600 22712425 : }
601 :
602 :
603 7716 : LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID )
604 : {
605 7716 : LanguageTag::ImplPtr pImpl;
606 :
607 7716 : if (!mbInitializedBcp47)
608 : {
609 0 : if (mbInitializedLocale)
610 : {
611 0 : maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
612 0 : mbInitializedBcp47 = !maBcp47.isEmpty();
613 : }
614 : }
615 7716 : if (maBcp47.isEmpty())
616 : {
617 : SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
618 0 : return pImpl;
619 : }
620 :
621 15432 : osl::MutexGuard aGuard( theMutex::get());
622 :
623 7716 : MapBcp47& rMapBcp47 = theMapBcp47::get();
624 7716 : MapBcp47::const_iterator it( rMapBcp47.find( maBcp47));
625 7716 : bool bOtherImpl = false;
626 7716 : if (it != rMapBcp47.end())
627 : {
628 : SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47 << "'");
629 7716 : pImpl = (*it).second;
630 7716 : if (pImpl.get() != this)
631 : {
632 : // Could happen for example if during registerImpl() the tag was
633 : // changed via canonicalize() and the result was already present in
634 : // the map before, for example 'bn-Beng' => 'bn'. This specific
635 : // case is now taken care of in registerImpl() and doesn't reach
636 : // here. However, use the already existing impl if it matches.
637 : SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47 << "'");
638 0 : *this = *pImpl; // ensure consistency
639 0 : bOtherImpl = true;
640 : }
641 : }
642 : else
643 : {
644 : SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47 << "'");
645 0 : pImpl.reset( new LanguageTagImpl( *this));
646 0 : rMapBcp47.insert( ::std::make_pair( maBcp47, pImpl));
647 : }
648 :
649 7716 : if (!bOtherImpl || !pImpl->mbInitializedLangID)
650 : {
651 7716 : if (nRegisterID == 0 || nRegisterID == LANGUAGE_DONTKNOW)
652 252 : nRegisterID = getNextOnTheFlyLanguage();
653 : else
654 : {
655 : // Accept a suggested ID only if it is not mapped yet to something
656 : // different, otherwise we would end up with ambiguous assignments
657 : // of different language tags, for example for the same primary
658 : // LangID with "no", "nb" and "nn".
659 7464 : const MapLangID& rMapLangID = theMapLangID::get();
660 7464 : MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
661 7464 : if (itID != rMapLangID.end())
662 : {
663 2 : if ((*itID).second->maBcp47 != maBcp47)
664 : {
665 : SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
666 : << ::std::hex << nRegisterID << " for '" << maBcp47 << "' have '"
667 : << (*itID).second->maBcp47 << "'");
668 2 : nRegisterID = getNextOnTheFlyLanguage();
669 : }
670 : else
671 : {
672 : SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
673 : << ::std::hex << nRegisterID << " for '" << maBcp47 << "' already registered");
674 : }
675 : }
676 : }
677 7716 : if (!nRegisterID)
678 : {
679 : // out of IDs, nothing to register
680 0 : return pImpl;
681 : }
682 7716 : pImpl->mnLangID = nRegisterID;
683 7716 : pImpl->mbInitializedLangID = true;
684 7716 : if (pImpl.get() != this)
685 : {
686 0 : mnLangID = nRegisterID;
687 0 : mbInitializedLangID = true;
688 : }
689 : }
690 :
691 : ::std::pair< MapLangID::const_iterator, bool > res(
692 7716 : theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
693 7716 : if (res.second)
694 : {
695 : SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
696 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
697 : }
698 : else
699 : {
700 : SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
701 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
702 : << (*res.first).second->maBcp47 << "'");
703 : }
704 :
705 7716 : return pImpl;
706 : }
707 :
708 : // static
709 472 : void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang )
710 : {
711 472 : if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_SYSTEM)
712 : {
713 : SAL_WARN( "i18nlangtag",
714 : "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
715 : ::std::hex << nLang);
716 0 : return;
717 : }
718 : SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex << nLang);
719 472 : MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang);
720 : // Resest system locale to none and let registerImpl() do the rest to
721 : // initialize a new one.
722 472 : theSystemLocale::get().reset();
723 472 : LanguageTag aLanguageTag( LANGUAGE_SYSTEM);
724 472 : aLanguageTag.registerImpl();
725 : }
726 :
727 224671 : static bool lcl_isKnownOnTheFlyID( LanguageType nLang )
728 : {
729 459478 : return nLang != LANGUAGE_DONTKNOW && nLang != LANGUAGE_SYSTEM &&
730 673743 : (LanguageTag::isOnTheFlyID( nLang) || (nLang == MsLangId::getPrimaryLanguage( nLang)));
731 : }
732 :
733 :
734 4221127 : LanguageTag::ImplPtr LanguageTag::registerImpl() const
735 : {
736 : // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
737 : // here as they access getImpl() and syncFromImpl() and would lead to
738 : // recursion. Also do not use the static LanguageTag::convertTo...()
739 : // methods as they may create temporary LanguageTag instances. Only
740 : // LanguageTagImpl::convertToBcp47(Locale) is ok.
741 :
742 4221127 : ImplPtr pImpl;
743 :
744 : #if OSL_DEBUG_LEVEL > 0
745 : static size_t nCalls = 0;
746 : ++nCalls;
747 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls << " calls");
748 : #endif
749 :
750 : // Do not register unresolved system locale, also force LangID if system
751 : // and take the system locale shortcut if possible.
752 4221127 : if (mbSystemLocale)
753 : {
754 167056 : pImpl = theSystemLocale::get();
755 167056 : if (pImpl)
756 : {
757 : #if OSL_DEBUG_LEVEL > 0
758 : static size_t nCallsSystem = 0;
759 : ++nCallsSystem;
760 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem << " system calls");
761 : #endif
762 166569 : return pImpl;
763 : }
764 487 : if (!mbInitializedLangID)
765 : {
766 487 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
767 487 : mbInitializedLangID = (mnLangID != LANGUAGE_SYSTEM);
768 : SAL_WARN_IF( !mbInitializedLangID, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
769 : }
770 : }
771 :
772 4054558 : if (mbInitializedLangID)
773 : {
774 2989184 : if (mnLangID == LANGUAGE_DONTKNOW)
775 : {
776 : // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
777 : // conversion attempts. At the same time provide a central breakpoint
778 : // to inspect such places.
779 1626352 : LanguageTag::ImplPtr& rDontKnow = theDontKnow::get();
780 1626352 : if (!rDontKnow)
781 296 : rDontKnow.reset( new LanguageTagImpl( *this));
782 1626352 : pImpl = rDontKnow;
783 : #if OSL_DEBUG_LEVEL > 0
784 : static size_t nCallsDontKnow = 0;
785 : ++nCallsDontKnow;
786 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow << " DontKnow calls");
787 : #endif
788 1626352 : return pImpl;
789 : }
790 : else
791 : {
792 : // A great share are calls for a system equal locale.
793 1362832 : pImpl = theSystemLocale::get();
794 1362832 : if (pImpl && pImpl->mnLangID == mnLangID)
795 : {
796 : #if OSL_DEBUG_LEVEL > 0
797 : static size_t nCallsSystemEqual = 0;
798 : ++nCallsSystemEqual;
799 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
800 : << " system equal LangID calls");
801 : #endif
802 867896 : return pImpl;
803 : }
804 : }
805 : }
806 :
807 : // Force Bcp47 if not LangID.
808 1560310 : if (!mbInitializedLangID && !mbInitializedBcp47 && mbInitializedLocale)
809 : {
810 659091 : maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
811 659091 : mbInitializedBcp47 = !maBcp47.isEmpty();
812 : }
813 :
814 1560310 : if (mbInitializedBcp47)
815 : {
816 : // A great share are calls for a system equal locale.
817 1065704 : pImpl = theSystemLocale::get();
818 1065704 : if (pImpl && pImpl->maBcp47 == maBcp47)
819 : {
820 : #if OSL_DEBUG_LEVEL > 0
821 : static size_t nCallsSystemEqual = 0;
822 : ++nCallsSystemEqual;
823 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual << " system equal BCP47 calls");
824 : #endif
825 427486 : return pImpl;
826 : }
827 : }
828 :
829 : #if OSL_DEBUG_LEVEL > 0
830 : static size_t nCallsNonSystem = 0;
831 : ++nCallsNonSystem;
832 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem << " non-system calls");
833 : #endif
834 :
835 2265648 : osl::MutexGuard aGuard( theMutex::get());
836 :
837 : #if OSL_DEBUG_LEVEL > 0
838 : static long nRunning = 0;
839 : // Entering twice here is ok, which is needed for fallback init in
840 : // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
841 : // everything else is suspicious.
842 : SAL_WARN_IF( nRunning > 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
843 : << maBcp47 << "' 0x" << ::std::hex << mnLangID );
844 : struct Runner { Runner() { ++nRunning; } ~Runner() { --nRunning; } } aRunner;
845 : #endif
846 :
847 : // Prefer LangID map as find+insert needs less comparison work.
848 1132824 : if (mbInitializedLangID)
849 : {
850 494936 : MapLangID& rMap = theMapLangID::get();
851 494936 : MapLangID::const_iterator it( rMap.find( mnLangID));
852 494936 : if (it != rMap.end())
853 : {
854 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex << mnLangID);
855 484452 : pImpl = (*it).second;
856 : }
857 : else
858 : {
859 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex << mnLangID);
860 10484 : pImpl.reset( new LanguageTagImpl( *this));
861 10484 : rMap.insert( ::std::make_pair( mnLangID, pImpl));
862 : // Try round-trip.
863 10484 : if (!pImpl->mbInitializedLocale)
864 10462 : pImpl->convertLangToLocale();
865 10484 : LanguageType nLang = MsLangId::Conversion::convertLocaleToLanguage( pImpl->maLocale);
866 : // If round-trip is identical cross-insert to Bcp47 map.
867 10484 : if (nLang == pImpl->mnLangID)
868 : {
869 9850 : if (!pImpl->mbInitializedBcp47)
870 9828 : pImpl->convertLocaleToBcp47();
871 : ::std::pair< MapBcp47::const_iterator, bool > res(
872 9850 : theMapBcp47::get().insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
873 9850 : if (res.second)
874 : {
875 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID);
876 : }
877 : else
878 : {
879 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " have 0x"
880 : << ::std::hex << (*res.first).second->mnLangID);
881 : }
882 : }
883 : else
884 : {
885 634 : if (!pImpl->mbInitializedBcp47)
886 634 : pImpl->convertLocaleToBcp47();
887 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " round-trip to 0x" << ::std::hex << nLang);
888 : }
889 : }
890 : }
891 637888 : else if (!maBcp47.isEmpty())
892 : {
893 637888 : MapBcp47& rMap = theMapBcp47::get();
894 637888 : MapBcp47::const_iterator it( rMap.find( maBcp47));
895 637888 : if (it != rMap.end())
896 : {
897 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47 << "'");
898 396794 : pImpl = (*it).second;
899 : }
900 : else
901 : {
902 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47 << "'");
903 241094 : pImpl.reset( new LanguageTagImpl( *this));
904 241094 : ::std::pair< MapBcp47::iterator, bool > insOrig( rMap.insert( ::std::make_pair( maBcp47, pImpl)));
905 : // If changed after canonicalize() also add the resulting tag to
906 : // the map.
907 241094 : if (pImpl->synCanonicalize())
908 : {
909 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl->maBcp47 << "'");
910 : ::std::pair< MapBcp47::const_iterator, bool > insCanon(
911 23894 : rMap.insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
912 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon.second ? "" : "not ")
913 : << "inserted '" << pImpl->maBcp47 << "'");
914 : // If the canonicalized tag already existed (was not inserted)
915 : // and impls are different, make this impl that impl and skip
916 : // the rest if that LangID is present as well. The existing
917 : // entry may or may not be different, it may even be strictly
918 : // identical to this if it differs only in case (e.g. ko-kr =>
919 : // ko-KR) which was corrected in canonicalize() hence also in
920 : // the map entry but comparison is case insensitive and found
921 : // it again.
922 23894 : if (!insCanon.second && (*insCanon.first).second != pImpl)
923 : {
924 16423 : (*insOrig.first).second = pImpl = (*insCanon.first).second;
925 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
926 : << ::std::hex << pImpl->mnLangID);
927 : }
928 : }
929 241094 : if (!pImpl->mbInitializedLangID)
930 : {
931 : // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
932 224671 : if (!pImpl->mbInitializedLocale)
933 224493 : pImpl->convertBcp47ToLocale();
934 224671 : if (!pImpl->mbInitializedLangID)
935 224671 : pImpl->convertLocaleToLang( true);
936 : // Unconditionally insert (round-trip is possible) for
937 : // on-the-fly IDs and (generated or not) suggested IDs.
938 224671 : bool bInsert = lcl_isKnownOnTheFlyID( pImpl->mnLangID);
939 224671 : OUString aBcp47;
940 224671 : if (!bInsert)
941 : {
942 214527 : if (pImpl->mnLangID != LANGUAGE_DONTKNOW)
943 : {
944 : // May have involved canonicalize(), so compare with
945 : // pImpl->maBcp47 instead of maBcp47!
946 429038 : aBcp47 = LanguageTagImpl::convertToBcp47(
947 429038 : MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true));
948 214519 : bInsert = (aBcp47 == pImpl->maBcp47);
949 : }
950 : }
951 : // If round-trip is identical cross-insert to Bcp47 map.
952 224671 : if (bInsert)
953 : {
954 : ::std::pair< MapLangID::const_iterator, bool > res(
955 212315 : theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
956 212315 : if (res.second)
957 : {
958 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
959 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
960 : }
961 : else
962 : {
963 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
964 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
965 : << (*res.first).second->maBcp47 << "'");
966 : }
967 : }
968 : else
969 : {
970 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
971 : << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' round-trip to '"
972 : << aBcp47 << "'");
973 224671 : }
974 : }
975 : }
976 : }
977 : else
978 : {
979 : SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex << mnLangID );
980 0 : pImpl.reset( new LanguageTagImpl( *this));
981 : }
982 :
983 : // If we reach here for mbSystemLocale we didn't have theSystemLocale
984 : // above, so add it.
985 1132824 : if (mbSystemLocale && mbInitializedLangID)
986 : {
987 487 : theSystemLocale::get() = pImpl;
988 : SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
989 : << ::std::hex << pImpl->mnLangID << " '" << pImpl->maBcp47 << "'");
990 : }
991 :
992 1132824 : return pImpl;
993 : }
994 :
995 :
996 6527773 : LanguageTag::ImplPtr LanguageTag::getImpl() const
997 : {
998 6527773 : if (!mpImpl)
999 : {
1000 4220325 : mpImpl = registerImpl();
1001 4220325 : syncVarsFromRawImpl();
1002 : }
1003 6527773 : return mpImpl;
1004 : }
1005 :
1006 :
1007 5617621 : void LanguageTag::resetVars()
1008 : {
1009 5617621 : mpImpl.reset();
1010 5617621 : maLocale = lang::Locale();
1011 5617621 : maBcp47 = OUString();
1012 5617621 : mnLangID = LANGUAGE_SYSTEM;
1013 5617621 : mbSystemLocale = true;
1014 5617621 : mbInitializedBcp47 = false;
1015 5617621 : mbInitializedLocale = false;
1016 5617621 : mbInitializedLangID = false;
1017 5617621 : mbIsFallback = false;
1018 5617621 : }
1019 :
1020 :
1021 871 : LanguageTag & LanguageTag::reset( const OUString & rBcp47LanguageTag, bool bCanonicalize )
1022 : {
1023 871 : resetVars();
1024 871 : maBcp47 = rBcp47LanguageTag;
1025 871 : mbSystemLocale = rBcp47LanguageTag.isEmpty();
1026 871 : mbInitializedBcp47 = !mbSystemLocale;
1027 :
1028 871 : if (bCanonicalize)
1029 : {
1030 0 : getImpl()->canonicalize();
1031 : // Registration itself may already have canonicalized, so do an
1032 : // unconditional sync.
1033 0 : syncFromImpl();
1034 : }
1035 871 : return *this;
1036 : }
1037 :
1038 :
1039 666 : LanguageTag & LanguageTag::reset( const com::sun::star::lang::Locale & rLocale )
1040 : {
1041 666 : resetVars();
1042 666 : maLocale = rLocale;
1043 666 : mbSystemLocale = rLocale.Language.isEmpty();
1044 666 : mbInitializedLocale = !mbSystemLocale;
1045 666 : handleVendorVariant( maLocale);
1046 666 : return *this;
1047 : }
1048 :
1049 :
1050 5616084 : LanguageTag & LanguageTag::reset( LanguageType nLanguage )
1051 : {
1052 5616084 : resetVars();
1053 5616084 : mnLangID = nLanguage;
1054 5616084 : mbSystemLocale = nLanguage == LANGUAGE_SYSTEM;
1055 5616084 : mbInitializedLangID = !mbSystemLocale;
1056 5616084 : return *this;
1057 : }
1058 :
1059 :
1060 267284 : bool LanguageTagImpl::canonicalize()
1061 : {
1062 : #ifdef erDEBUG
1063 : // dump once
1064 : struct dumper
1065 : {
1066 : lt_tag_t** mpp;
1067 : dumper( lt_tag_t** pp ) : mpp( *pp ? NULL : pp) {}
1068 : ~dumper() { if (mpp && *mpp) lt_tag_dump( *mpp); }
1069 : };
1070 : dumper aDumper( &mpImplLangtag);
1071 : #endif
1072 :
1073 267284 : bool bChanged = false;
1074 :
1075 : // Side effect: have maBcp47 in any case, resolved system.
1076 : // Some methods calling canonicalize() (or not calling it due to
1077 : // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
1078 : // meIsLiblangtagNeeded anywhere else than hereafter.
1079 267284 : getBcp47();
1080 :
1081 : // The simple cases and known locales don't need liblangtag processing,
1082 : // which also avoids loading liblangtag data on startup.
1083 267284 : if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
1084 : {
1085 251681 : bool bTemporaryLocale = false;
1086 251681 : bool bTemporaryLangID = false;
1087 251681 : if (!mbInitializedLocale && !mbInitializedLangID)
1088 : {
1089 240916 : if (mbSystemLocale)
1090 : {
1091 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1092 0 : mbInitializedLangID = true;
1093 : }
1094 : else
1095 : {
1096 : // Now this is getting funny.. we only have some BCP47 string
1097 : // and want to determine if parsing it would be possible
1098 : // without using liblangtag just to see if it is a simple known
1099 : // locale or could fall back to one.
1100 481832 : OUString aLanguage, aScript, aCountry, aVariants;
1101 240916 : Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
1102 240916 : if (eExt != EXTRACTED_NONE)
1103 : {
1104 240902 : if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV)
1105 : {
1106 : // Rebuild bcp47 with proper casing of tags.
1107 240874 : OUStringBuffer aBuf( aLanguage.getLength() + 1 + aScript.getLength() +
1108 240874 : 1 + aCountry.getLength() + 1 + aVariants.getLength());
1109 240874 : aBuf.append( aLanguage);
1110 240874 : if (!aScript.isEmpty())
1111 28874 : aBuf.append("-" + aScript);
1112 240874 : if (!aCountry.isEmpty())
1113 208653 : aBuf.append("-" + aCountry);
1114 240874 : if (!aVariants.isEmpty())
1115 904 : aBuf.append("-" + aVariants);
1116 481748 : OUString aStr( aBuf.makeStringAndClear());
1117 :
1118 240874 : if (maBcp47 != aStr)
1119 : {
1120 2022 : maBcp47 = aStr;
1121 2022 : bChanged = true;
1122 240874 : }
1123 : }
1124 240902 : if (eExt == EXTRACTED_LSC && aScript.isEmpty())
1125 : {
1126 211096 : maLocale.Language = aLanguage;
1127 211096 : maLocale.Country = aCountry;
1128 : }
1129 : else
1130 : {
1131 29806 : maLocale.Language = I18NLANGTAG_QLT;
1132 29806 : maLocale.Country = aCountry;
1133 29806 : maLocale.Variant = maBcp47;
1134 : }
1135 240902 : bTemporaryLocale = mbInitializedLocale = true;
1136 240916 : }
1137 : }
1138 : }
1139 251681 : if (mbInitializedLangID && !mbInitializedLocale)
1140 : {
1141 : // Do not call getLocale() here because that prefers
1142 : // convertBcp47ToLocale() which would end up in recursion via
1143 : // isIsoLocale()!
1144 :
1145 : // Prepare to verify that we have a known locale, not just an
1146 : // arbitrary MS-LangID.
1147 0 : convertLangToLocale();
1148 : }
1149 251681 : if (mbInitializedLocale)
1150 : {
1151 251667 : if (maLocale.Variant.isEmpty())
1152 220313 : meIsLiblangtagNeeded = DECISION_NO; // per definition ll[l][-CC]
1153 : else
1154 : {
1155 31354 : if (!mbInitializedLangID)
1156 : {
1157 29806 : convertLocaleToLang( false);
1158 29806 : if (bTemporaryLocale || mnLangID == LANGUAGE_DONTKNOW)
1159 29806 : bTemporaryLangID = true;
1160 : }
1161 31354 : if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
1162 31326 : meIsLiblangtagNeeded = DECISION_NO; // known locale
1163 : else
1164 : {
1165 28 : const KnownTagSet& rKnowns = getKnowns();
1166 28 : if (rKnowns.find( maBcp47) != rKnowns.end())
1167 2 : meIsLiblangtagNeeded = DECISION_NO; // known fallback
1168 : }
1169 : }
1170 : // We may have an internal override "canonicalization".
1171 251667 : lang::Locale aNew( MsLangId::Conversion::getOverride( maLocale));
1172 517840 : if (!aNew.Language.isEmpty() &&
1173 466173 : (aNew.Language != maLocale.Language ||
1174 443975 : aNew.Country != maLocale.Country ||
1175 221777 : aNew.Variant != maLocale.Variant))
1176 : {
1177 22198 : maBcp47 = LanguageTagImpl::convertToBcp47( aNew);
1178 22198 : bChanged = true;
1179 22198 : meIsIsoLocale = DECISION_DONTKNOW;
1180 22198 : meIsIsoODF = DECISION_DONTKNOW;
1181 22198 : meIsLiblangtagNeeded = DECISION_NO; // known locale
1182 251667 : }
1183 : }
1184 251681 : if (bTemporaryLocale)
1185 : {
1186 240902 : mbInitializedLocale = false;
1187 240902 : maLocale = lang::Locale();
1188 : }
1189 251681 : if (bTemporaryLangID)
1190 : {
1191 29806 : mbInitializedLangID = false;
1192 29806 : mnLangID = LANGUAGE_DONTKNOW;
1193 : }
1194 : }
1195 267284 : if (meIsLiblangtagNeeded == DECISION_NO)
1196 : {
1197 267240 : meIsValid = DECISION_YES; // really, known must be valid ...
1198 267240 : return bChanged; // that's it
1199 : }
1200 :
1201 44 : meIsLiblangtagNeeded = DECISION_YES;
1202 : SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
1203 :
1204 44 : if (!mpImplLangtag)
1205 : {
1206 40 : theDataRef::get().incRef();
1207 40 : mpImplLangtag = lt_tag_new();
1208 : }
1209 :
1210 44 : myLtError aError;
1211 :
1212 44 : if (lt_tag_parse( mpImplLangtag, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
1213 : {
1214 32 : char* pTag = lt_tag_canonicalize( mpImplLangtag, &aError.p);
1215 : SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
1216 32 : if (pTag)
1217 : {
1218 32 : OUString aNew( OUString::createFromAscii( pTag));
1219 : // Make the lt_tag_t follow the new string if different, which
1220 : // removes default script and such.
1221 32 : if (maBcp47 != aNew)
1222 : {
1223 4 : maBcp47 = aNew;
1224 4 : bChanged = true;
1225 4 : meIsIsoLocale = DECISION_DONTKNOW;
1226 4 : meIsIsoODF = DECISION_DONTKNOW;
1227 4 : if (!lt_tag_parse( mpImplLangtag, pTag, &aError.p))
1228 : {
1229 : SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '" << maBcp47 << "'");
1230 0 : free( pTag);
1231 0 : meIsValid = DECISION_NO;
1232 0 : return bChanged;
1233 : }
1234 : }
1235 32 : free( pTag);
1236 32 : meIsValid = DECISION_YES;
1237 32 : return bChanged;
1238 : }
1239 : }
1240 : else
1241 : {
1242 : SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
1243 : }
1244 12 : meIsValid = DECISION_NO;
1245 12 : return bChanged;
1246 : }
1247 :
1248 :
1249 1327831 : bool LanguageTagImpl::synCanonicalize()
1250 : {
1251 1327831 : bool bChanged = false;
1252 1327831 : if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
1253 : {
1254 251681 : bChanged = canonicalize();
1255 251681 : if (bChanged)
1256 : {
1257 24224 : if (mbInitializedLocale)
1258 330 : convertBcp47ToLocale();
1259 24224 : if (mbInitializedLangID)
1260 330 : convertBcp47ToLang();
1261 : }
1262 : }
1263 1327831 : return bChanged;
1264 : }
1265 :
1266 :
1267 629120 : void LanguageTag::syncFromImpl()
1268 : {
1269 629120 : ImplPtr xImpl = getImpl();
1270 629120 : LanguageTagImpl* pImpl = xImpl.get();
1271 629450 : bool bRegister = ((mbInitializedBcp47 && maBcp47 != pImpl->maBcp47) ||
1272 1257910 : (mbInitializedLangID && mnLangID != pImpl->mnLangID));
1273 : SAL_INFO_IF( bRegister, "i18nlangtag",
1274 : "LanguageTag::syncFromImpl: re-registering, '" << pImpl->maBcp47 << "' vs '" << maBcp47 <<
1275 : " and 0x" << ::std::hex << pImpl->mnLangID << " vs 0x" << ::std::hex << mnLangID);
1276 629120 : syncVarsFromRawImpl();
1277 629120 : if (bRegister)
1278 330 : mpImpl = registerImpl();
1279 629120 : }
1280 :
1281 :
1282 1753798 : void LanguageTag::syncVarsFromImpl() const
1283 : {
1284 1753798 : if (!mpImpl)
1285 1753798 : getImpl(); // with side effect syncVarsFromRawImpl()
1286 : else
1287 0 : syncVarsFromRawImpl();
1288 1753798 : }
1289 :
1290 :
1291 4849445 : void LanguageTag::syncVarsFromRawImpl() const
1292 : {
1293 : // Do not use getImpl() here.
1294 4849445 : LanguageTagImpl* pImpl = mpImpl.get();
1295 4849445 : if (!pImpl)
1296 4849445 : return;
1297 :
1298 : // Obviously only mutable variables.
1299 4849445 : mbInitializedBcp47 = pImpl->mbInitializedBcp47;
1300 4849445 : maBcp47 = pImpl->maBcp47;
1301 4849445 : mbInitializedLocale = pImpl->mbInitializedLocale;
1302 4849445 : maLocale = pImpl->maLocale;
1303 4849445 : mbInitializedLangID = pImpl->mbInitializedLangID;
1304 4849445 : mnLangID = pImpl->mnLangID;
1305 : }
1306 :
1307 :
1308 0 : bool LanguageTag::synCanonicalize()
1309 : {
1310 0 : bool bChanged = getImpl()->synCanonicalize();
1311 0 : if (bChanged)
1312 0 : syncFromImpl();
1313 0 : return bChanged;
1314 : }
1315 :
1316 :
1317 10758 : void LanguageTagImpl::convertLocaleToBcp47()
1318 : {
1319 10758 : if (mbSystemLocale && !mbInitializedLocale)
1320 0 : convertLangToLocale();
1321 :
1322 10758 : if (maLocale.Language.isEmpty())
1323 : {
1324 : // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
1325 : // locale via LanguageTag::convertToBcp47(LanguageType) and
1326 : // LanguageTag::convertToLocale(LanguageType) would instanciate another
1327 : // LanguageTag.
1328 0 : maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, true);
1329 : }
1330 10758 : if (maLocale.Language.isEmpty())
1331 : {
1332 0 : maBcp47 = OUString(); // bad luck
1333 : }
1334 10758 : else if (maLocale.Language == I18NLANGTAG_QLT)
1335 : {
1336 1626 : maBcp47 = maLocale.Variant;
1337 1626 : meIsIsoLocale = DECISION_NO;
1338 : }
1339 : else
1340 : {
1341 9132 : maBcp47 = LanguageTag::convertToBcp47( maLocale, true);
1342 : }
1343 10758 : mbInitializedBcp47 = true;
1344 10758 : }
1345 :
1346 :
1347 254807 : void LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID )
1348 : {
1349 254807 : if (mbSystemLocale)
1350 : {
1351 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1352 : }
1353 : else
1354 : {
1355 254807 : mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
1356 254807 : if (mnLangID == LANGUAGE_DONTKNOW && bAllowOnTheFlyID)
1357 : {
1358 7724 : if (isValidBcp47())
1359 : {
1360 : // For language-only (including script) look if we know some
1361 : // locale of that language and if so try to use the primary
1362 : // language ID of that instead of generating an on-the-fly ID.
1363 7716 : if (getCountry().isEmpty() && isIsoODF())
1364 : {
1365 7468 : lang::Locale aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale));
1366 : // 'en-US' is last resort, do not use except when looking
1367 : // for 'en'.
1368 7468 : if (aLoc.Language != "en" || getLanguage() == "en")
1369 : {
1370 7464 : mnLangID = MsLangId::Conversion::convertLocaleToLanguage( aLoc);
1371 7464 : if (mnLangID != LANGUAGE_DONTKNOW)
1372 7464 : mnLangID = MsLangId::getPrimaryLanguage( mnLangID);
1373 7468 : }
1374 : }
1375 7716 : registerOnTheFly( mnLangID);
1376 : }
1377 : else
1378 : {
1379 : SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
1380 : << maBcp47 << "'");
1381 : }
1382 : }
1383 : }
1384 254807 : mbInitializedLangID = true;
1385 254807 : }
1386 :
1387 :
1388 0 : void LanguageTag::convertLocaleToLang()
1389 : {
1390 0 : getImpl()->convertLocaleToLang( true);
1391 0 : syncFromImpl();
1392 0 : }
1393 :
1394 :
1395 224823 : void LanguageTagImpl::convertBcp47ToLocale()
1396 : {
1397 224823 : bool bIso = isIsoLocale();
1398 224823 : if (bIso)
1399 : {
1400 200436 : maLocale.Language = getLanguageFromLangtag();
1401 200436 : maLocale.Country = getRegionFromLangtag();
1402 200436 : maLocale.Variant = OUString();
1403 : }
1404 : else
1405 : {
1406 24387 : maLocale.Language = I18NLANGTAG_QLT;
1407 24387 : maLocale.Country = getCountry();
1408 24387 : maLocale.Variant = maBcp47;
1409 : }
1410 224823 : mbInitializedLocale = true;
1411 224823 : }
1412 :
1413 :
1414 0 : void LanguageTag::convertBcp47ToLocale()
1415 : {
1416 0 : getImpl()->convertBcp47ToLocale();
1417 0 : syncFromImpl();
1418 0 : }
1419 :
1420 :
1421 330 : void LanguageTagImpl::convertBcp47ToLang()
1422 : {
1423 330 : if (mbSystemLocale)
1424 : {
1425 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1426 : }
1427 : else
1428 : {
1429 330 : if (!mbInitializedLocale)
1430 0 : convertBcp47ToLocale();
1431 330 : convertLocaleToLang( true);
1432 : }
1433 330 : mbInitializedLangID = true;
1434 330 : }
1435 :
1436 :
1437 0 : void LanguageTag::convertBcp47ToLang()
1438 : {
1439 0 : getImpl()->convertBcp47ToLang();
1440 0 : syncFromImpl();
1441 0 : }
1442 :
1443 :
1444 10758 : void LanguageTagImpl::convertLangToLocale()
1445 : {
1446 10758 : if (mbSystemLocale && !mbInitializedLangID)
1447 : {
1448 0 : mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
1449 0 : mbInitializedLangID = true;
1450 : }
1451 : // Resolve system here! The original is remembered as mbSystemLocale.
1452 10758 : maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true);
1453 10758 : mbInitializedLocale = true;
1454 10758 : }
1455 :
1456 :
1457 0 : void LanguageTag::convertLangToLocale()
1458 : {
1459 0 : getImpl()->convertLangToLocale();
1460 0 : syncFromImpl();
1461 0 : }
1462 :
1463 :
1464 296 : void LanguageTagImpl::convertLangToBcp47()
1465 : {
1466 296 : if (!mbInitializedLocale)
1467 296 : convertLangToLocale();
1468 296 : convertLocaleToBcp47();
1469 296 : mbInitializedBcp47 = true;
1470 296 : }
1471 :
1472 :
1473 306 : void LanguageTag::convertFromRtlLocale()
1474 : {
1475 : // The rtl_Locale follows the Open Group Base Specification,
1476 : // 8.2 Internationalization Variables
1477 : // language[_territory][.codeset][@modifier]
1478 : // On GNU/Linux systems usually being glibc locales.
1479 : // sal/osl/unx/nlsupport.c _parse_locale() parses them into
1480 : // Language: language 2 or 3 alpha code
1481 : // Country: [territory] 2 alpha code
1482 : // Variant: [.codeset][@modifier]
1483 : // Variant effectively contains anything that follows the territory, not
1484 : // looking for '.' dot delimiter or '@' modifier content.
1485 306 : if (!maLocale.Variant.isEmpty())
1486 : {
1487 612 : OString aStr = OUStringToOString( maLocale.Language + "_" + maLocale.Country + maLocale.Variant,
1488 306 : RTL_TEXTENCODING_UTF8);
1489 : /* FIXME: let liblangtag parse this entirely with
1490 : * lt_tag_convert_from_locale() but that needs a patch to pass the
1491 : * string. */
1492 : #if 0
1493 : myLtError aError;
1494 : theDataRef::get().incRef();
1495 : mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
1496 : maBcp47 = OStringToOUString( lt_tag_get_string( mpImplLangtag), RTL_TEXTENCODING_UTF8);
1497 : mbInitializedBcp47 = true;
1498 : #else
1499 306 : mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr);
1500 306 : if (mnLangID == LANGUAGE_DONTKNOW)
1501 : {
1502 : SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
1503 0 : mnLangID = LANGUAGE_ENGLISH_US; // we need _something_ here
1504 : }
1505 306 : mbInitializedLangID = true;
1506 : #endif
1507 306 : maLocale = lang::Locale();
1508 306 : mbInitializedLocale = false;
1509 : }
1510 306 : }
1511 :
1512 :
1513 267562 : const OUString & LanguageTagImpl::getBcp47() const
1514 : {
1515 267562 : if (!mbInitializedBcp47)
1516 : {
1517 296 : if (mbInitializedLocale)
1518 0 : const_cast<LanguageTagImpl*>(this)->convertLocaleToBcp47();
1519 : else
1520 296 : const_cast<LanguageTagImpl*>(this)->convertLangToBcp47();
1521 : }
1522 267562 : return maBcp47;
1523 : }
1524 :
1525 :
1526 487559 : const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
1527 : {
1528 487559 : if (!bResolveSystem && mbSystemLocale)
1529 12531 : return theEmptyBcp47::get();
1530 475028 : if (!mbInitializedBcp47)
1531 402241 : syncVarsFromImpl();
1532 475028 : if (!mbInitializedBcp47)
1533 : {
1534 278 : getImpl()->getBcp47();
1535 278 : const_cast<LanguageTag*>(this)->syncFromImpl();
1536 : }
1537 475028 : return maBcp47;
1538 : }
1539 :
1540 :
1541 411581 : OUString LanguageTagImpl::getLanguageFromLangtag()
1542 : {
1543 411581 : OUString aLanguage;
1544 411581 : synCanonicalize();
1545 411581 : if (maBcp47.isEmpty())
1546 0 : return aLanguage;
1547 411581 : if (mpImplLangtag)
1548 : {
1549 34 : const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag);
1550 : SAL_WARN_IF( !pLangT, "i18nlangtag",
1551 : "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'");
1552 34 : if (!pLangT)
1553 22 : return aLanguage;
1554 12 : const char* pLang = lt_lang_get_tag( pLangT);
1555 : SAL_WARN_IF( !pLang, "i18nlangtag",
1556 : "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'");
1557 12 : if (pLang)
1558 12 : aLanguage = OUString::createFromAscii( pLang);
1559 : }
1560 : else
1561 : {
1562 411547 : if (mbCachedLanguage || cacheSimpleLSCV())
1563 411543 : aLanguage = maCachedLanguage;
1564 : }
1565 411559 : return aLanguage;
1566 : }
1567 :
1568 :
1569 32 : OUString LanguageTagImpl::getScriptFromLangtag()
1570 : {
1571 32 : OUString aScript;
1572 32 : synCanonicalize();
1573 32 : if (maBcp47.isEmpty())
1574 0 : return aScript;
1575 32 : if (mpImplLangtag)
1576 : {
1577 30 : const lt_script_t* pScriptT = lt_tag_get_script( mpImplLangtag);
1578 : // pScriptT==NULL is valid for default scripts
1579 30 : if (!pScriptT)
1580 30 : return aScript;
1581 0 : const char* pScript = lt_script_get_tag( pScriptT);
1582 : SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
1583 0 : if (pScript)
1584 0 : aScript = OUString::createFromAscii( pScript);
1585 : }
1586 : else
1587 : {
1588 2 : if (mbCachedScript || cacheSimpleLSCV())
1589 2 : aScript = maCachedScript;
1590 : }
1591 2 : return aScript;
1592 : }
1593 :
1594 :
1595 434030 : OUString LanguageTagImpl::getRegionFromLangtag()
1596 : {
1597 434030 : OUString aRegion;
1598 434030 : synCanonicalize();
1599 434030 : if (maBcp47.isEmpty())
1600 0 : return aRegion;
1601 434030 : if (mpImplLangtag)
1602 : {
1603 50 : const lt_region_t* pRegionT = lt_tag_get_region( mpImplLangtag);
1604 : // pRegionT==NULL is valid for language only tags, rough check here
1605 : // that does not take sophisticated tags into account that actually
1606 : // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
1607 : // that ll-CC and lll-CC actually fail.
1608 : SAL_WARN_IF( !pRegionT &&
1609 : maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
1610 : maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
1611 : "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'");
1612 50 : if (!pRegionT)
1613 44 : return aRegion;
1614 6 : const char* pRegion = lt_region_get_tag( pRegionT);
1615 : SAL_WARN_IF( !pRegion, "i18nlangtag",
1616 : "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'");
1617 6 : if (pRegion)
1618 6 : aRegion = OUString::createFromAscii( pRegion);
1619 : }
1620 : else
1621 : {
1622 433980 : if (mbCachedCountry || cacheSimpleLSCV())
1623 433974 : aRegion = maCachedCountry;
1624 : }
1625 433986 : return aRegion;
1626 : }
1627 :
1628 :
1629 2 : OUString LanguageTagImpl::getVariantsFromLangtag()
1630 : {
1631 2 : OUString aVariants;
1632 2 : synCanonicalize();
1633 2 : if (maBcp47.isEmpty())
1634 0 : return aVariants;
1635 2 : if (mpImplLangtag)
1636 : {
1637 2 : const lt_list_t* pVariantsT = lt_tag_get_variants( mpImplLangtag);
1638 4 : for (const lt_list_t* pE = pVariantsT; pE; pE = lt_list_next( pE))
1639 : {
1640 2 : const lt_variant_t* pVariantT = static_cast<const lt_variant_t*>(lt_list_value( pE));
1641 2 : if (pVariantT)
1642 : {
1643 2 : const char* p = lt_variant_get_tag( pVariantT);
1644 2 : if (p)
1645 : {
1646 2 : if (aVariants.isEmpty())
1647 2 : aVariants = OUString::createFromAscii( p);
1648 : else
1649 0 : aVariants += "-" + OUString::createFromAscii( p);
1650 : }
1651 : }
1652 : }
1653 : }
1654 : else
1655 : {
1656 0 : if (mbCachedVariants || cacheSimpleLSCV())
1657 0 : aVariants = maCachedVariants;
1658 : }
1659 2 : return aVariants;
1660 : }
1661 :
1662 :
1663 4739789 : const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
1664 : {
1665 4739789 : if (!bResolveSystem && mbSystemLocale)
1666 900 : return theEmptyLocale::get();
1667 4738889 : if (!mbInitializedLocale)
1668 719401 : syncVarsFromImpl();
1669 4738889 : if (!mbInitializedLocale)
1670 : {
1671 0 : if (mbInitializedBcp47)
1672 0 : const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
1673 : else
1674 0 : const_cast<LanguageTag*>(this)->convertLangToLocale();
1675 : }
1676 4738889 : return maLocale;
1677 : }
1678 :
1679 :
1680 18155968 : LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
1681 : {
1682 18155968 : if (!bResolveSystem && mbSystemLocale)
1683 1268172 : return LANGUAGE_SYSTEM;
1684 16887796 : if (!mbInitializedLangID)
1685 632156 : syncVarsFromImpl();
1686 16887796 : if (!mbInitializedLangID)
1687 : {
1688 0 : if (mbInitializedBcp47)
1689 0 : const_cast<LanguageTag*>(this)->convertBcp47ToLang();
1690 : else
1691 : {
1692 0 : const_cast<LanguageTag*>(this)->convertLocaleToLang();
1693 :
1694 : /* Resolve a locale only unknown due to some redundant information,
1695 : * like 'de-Latn-DE' with script tag. Never call canonicalize()
1696 : * from within convert...() methods due to possible recursion, so
1697 : * do it here. */
1698 0 : if ((!mbSystemLocale && mnLangID == LANGUAGE_SYSTEM) || mnLangID == LANGUAGE_DONTKNOW)
1699 0 : const_cast<LanguageTag*>(this)->synCanonicalize();
1700 : }
1701 : }
1702 16887796 : return mnLangID;
1703 : }
1704 :
1705 :
1706 0 : void LanguageTag::getIsoLanguageScriptCountry( OUString& rLanguage, OUString& rScript, OUString& rCountry ) const
1707 : {
1708 : // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
1709 : // and getCountry() to work correctly in this context.
1710 0 : if (isIsoODF())
1711 : {
1712 0 : rLanguage = getLanguage();
1713 0 : rScript = getScript();
1714 0 : rCountry = getCountry();
1715 : }
1716 : else
1717 : {
1718 0 : rLanguage = (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
1719 0 : rScript = (LanguageTag::isIsoScript( getScript()) ? getScript() : OUString());
1720 0 : rCountry = (LanguageTag::isIsoCountry( getCountry()) ? getCountry() : OUString());
1721 : }
1722 0 : }
1723 :
1724 :
1725 : namespace
1726 : {
1727 :
1728 480963 : inline bool isLowerAscii( sal_Unicode c )
1729 : {
1730 480963 : return 'a' <= c && c <= 'z';
1731 : }
1732 :
1733 405756 : inline bool isUpperAscii( sal_Unicode c )
1734 : {
1735 405756 : return 'A' <= c && c <= 'Z';
1736 : }
1737 :
1738 : }
1739 :
1740 :
1741 : // static
1742 209227 : bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
1743 : {
1744 : /* TODO: ignore case? For now let's see where rubbish is used. */
1745 : bool b2chars;
1746 690258 : if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) &&
1747 836770 : isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
1748 62577 : (b2chars || isLowerAscii( rLanguage[2])))
1749 209181 : return true;
1750 : SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
1751 : (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
1752 : (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
1753 : "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
1754 46 : return false;
1755 : }
1756 :
1757 :
1758 : // static
1759 233594 : bool LanguageTag::isIsoCountry( const OUString& rRegion )
1760 : {
1761 : /* TODO: ignore case? For now let's see where rubbish is used. */
1762 670062 : if (rRegion.isEmpty() ||
1763 405754 : (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
1764 233588 : return true;
1765 : SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
1766 : "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
1767 6 : return false;
1768 : }
1769 :
1770 :
1771 : // static
1772 7548 : bool LanguageTag::isIsoScript( const OUString& rScript )
1773 : {
1774 : /* TODO: ignore case? For now let's see where rubbish is used. */
1775 15104 : if (rScript.isEmpty() ||
1776 16 : (rScript.getLength() == 4 &&
1777 24 : isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
1778 16 : isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
1779 7548 : return true;
1780 : SAL_WARN_IF( rScript.getLength() == 4 &&
1781 : (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
1782 : isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
1783 : "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
1784 0 : return false;
1785 : }
1786 :
1787 :
1788 219498 : OUString LanguageTagImpl::getLanguage() const
1789 : {
1790 219498 : if (!mbCachedLanguage)
1791 : {
1792 211145 : maCachedLanguage = const_cast<LanguageTagImpl*>(this)->getLanguageFromLangtag();
1793 211145 : mbCachedLanguage = true;
1794 : }
1795 219498 : return maCachedLanguage;
1796 : }
1797 :
1798 :
1799 2816694 : OUString LanguageTag::getLanguage() const
1800 : {
1801 2816694 : ImplPtr pImpl = getImpl();
1802 2816694 : if (pImpl->mbCachedLanguage)
1803 2806427 : return pImpl->maCachedLanguage;
1804 20534 : OUString aRet( pImpl->getLanguage());
1805 10267 : const_cast<LanguageTag*>(this)->syncFromImpl();
1806 2826961 : return aRet;
1807 : }
1808 :
1809 :
1810 7548 : OUString LanguageTagImpl::getScript() const
1811 : {
1812 7548 : if (!mbCachedScript)
1813 : {
1814 32 : maCachedScript = const_cast<LanguageTagImpl*>(this)->getScriptFromLangtag();
1815 32 : mbCachedScript = true;
1816 : }
1817 7548 : return maCachedScript;
1818 : }
1819 :
1820 :
1821 47781 : OUString LanguageTag::getScript() const
1822 : {
1823 47781 : ImplPtr pImpl = getImpl();
1824 47781 : if (pImpl->mbCachedScript)
1825 47781 : return pImpl->maCachedScript;
1826 0 : OUString aRet( pImpl->getScript());
1827 0 : const_cast<LanguageTag*>(this)->syncFromImpl();
1828 47781 : return aRet;
1829 : }
1830 :
1831 :
1832 10914 : OUString LanguageTag::getLanguageAndScript() const
1833 : {
1834 10914 : OUString aLanguageScript( getLanguage());
1835 21828 : OUString aScript( getScript());
1836 10914 : if (!aScript.isEmpty())
1837 : {
1838 6 : aLanguageScript += "-" + aScript;
1839 : }
1840 21828 : return aLanguageScript;
1841 : }
1842 :
1843 :
1844 32125 : OUString LanguageTagImpl::getCountry() const
1845 : {
1846 32125 : if (!mbCachedCountry)
1847 : {
1848 24413 : maCachedCountry = const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1849 24413 : if (!LanguageTag::isIsoCountry( maCachedCountry))
1850 2 : maCachedCountry = OUString();
1851 24413 : mbCachedCountry = true;
1852 : }
1853 32125 : return maCachedCountry;
1854 : }
1855 :
1856 :
1857 622481 : OUString LanguageTag::getCountry() const
1858 : {
1859 622481 : ImplPtr pImpl = getImpl();
1860 622481 : if (pImpl->mbCachedCountry)
1861 622459 : return pImpl->maCachedCountry;
1862 44 : OUString aRet( pImpl->getCountry());
1863 22 : const_cast<LanguageTag*>(this)->syncFromImpl();
1864 622503 : return aRet;
1865 : }
1866 :
1867 :
1868 209181 : OUString LanguageTagImpl::getRegion() const
1869 : {
1870 209181 : return const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
1871 : }
1872 :
1873 :
1874 10 : OUString LanguageTagImpl::getVariants() const
1875 : {
1876 10 : if (!mbCachedVariants)
1877 : {
1878 2 : maCachedVariants = const_cast<LanguageTagImpl*>(this)->getVariantsFromLangtag();
1879 2 : mbCachedVariants = true;
1880 : }
1881 10 : return maCachedVariants;
1882 : }
1883 :
1884 :
1885 39068 : OUString LanguageTag::getVariants() const
1886 : {
1887 39068 : ImplPtr pImpl = getImpl();
1888 39068 : if (pImpl->mbCachedVariants)
1889 39068 : return pImpl->maCachedVariants;
1890 0 : OUString aRet( pImpl->getVariants());
1891 0 : const_cast<LanguageTag*>(this)->syncFromImpl();
1892 39068 : return aRet;
1893 : }
1894 :
1895 :
1896 0 : OUString LanguageTag::getGlibcLocaleString( const OUString & rEncoding ) const
1897 : {
1898 0 : OUString aRet;
1899 0 : if (isIsoLocale())
1900 : {
1901 0 : OUString aCountry( getCountry());
1902 0 : if (aCountry.isEmpty())
1903 0 : aRet = getLanguage() + rEncoding;
1904 : else
1905 0 : aRet = getLanguage() + "_" + aCountry + rEncoding;
1906 : }
1907 : else
1908 : {
1909 : /* FIXME: use the aImplIsoLangGLIBCModifiersEntries table from
1910 : * i18nlangtag/source/isolang/isolang.cxx or let liblangtag handle it.
1911 : * So far no code was prepared for anything else than a simple
1912 : * language_country locale so we don't lose anything here right now.
1913 : * */
1914 : }
1915 0 : return aRet;
1916 : }
1917 :
1918 :
1919 40670 : bool LanguageTagImpl::hasScript() const
1920 : {
1921 40670 : if (!mbCachedScript)
1922 0 : getScript();
1923 40670 : return !maCachedScript.isEmpty();
1924 : }
1925 :
1926 :
1927 40670 : bool LanguageTag::hasScript() const
1928 : {
1929 40670 : bool bRet = getImpl()->hasScript();
1930 40670 : const_cast<LanguageTag*>(this)->syncFromImpl();
1931 40670 : return bRet;
1932 : }
1933 :
1934 :
1935 235164 : bool LanguageTagImpl::cacheSimpleLSCV()
1936 : {
1937 470328 : OUString aLanguage, aScript, aCountry, aVariants;
1938 235164 : Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
1939 235164 : bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV);
1940 235164 : if (bRet)
1941 : {
1942 235154 : maCachedLanguage = aLanguage;
1943 235154 : maCachedScript = aScript;
1944 235154 : maCachedCountry = aCountry;
1945 235154 : maCachedVariants = aVariants;
1946 235154 : mbCachedLanguage = mbCachedScript = mbCachedCountry = mbCachedVariants = true;
1947 : }
1948 470328 : return bRet;
1949 : }
1950 :
1951 :
1952 790817 : bool LanguageTagImpl::isIsoLocale() const
1953 : {
1954 790817 : if (meIsIsoLocale == DECISION_DONTKNOW)
1955 : {
1956 233554 : const_cast<LanguageTagImpl*>(this)->synCanonicalize();
1957 : // It must be at most ll-CC or lll-CC
1958 : // Do not use getCountry() here, use getRegion() instead.
1959 467108 : meIsIsoLocale = ((maBcp47.isEmpty() ||
1960 1303832 : (maBcp47.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
1961 1118974 : LanguageTag::isIsoCountry( getRegion()))) ? DECISION_YES : DECISION_NO);
1962 : }
1963 790817 : return meIsIsoLocale == DECISION_YES;
1964 : }
1965 :
1966 :
1967 558456 : bool LanguageTag::isIsoLocale() const
1968 : {
1969 558456 : bool bRet = getImpl()->isIsoLocale();
1970 558456 : const_cast<LanguageTag*>(this)->syncFromImpl();
1971 558456 : return bRet;
1972 : }
1973 :
1974 :
1975 8212 : bool LanguageTagImpl::isIsoODF() const
1976 : {
1977 8212 : if (meIsIsoODF == DECISION_DONTKNOW)
1978 : {
1979 7538 : const_cast<LanguageTagImpl*>(this)->synCanonicalize();
1980 7538 : if (!LanguageTag::isIsoScript( getScript()))
1981 : {
1982 0 : meIsIsoODF = DECISION_NO;
1983 0 : return false;
1984 : }
1985 : // The usual case is lll-CC so simply check that first.
1986 7538 : if (isIsoLocale())
1987 : {
1988 7500 : meIsIsoODF = DECISION_YES;
1989 7500 : return true;
1990 : }
1991 : // If this is not ISO locale for which script must not exist it can
1992 : // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
1993 : // ll-vvvvvvvv
1994 168 : meIsIsoODF = ((maBcp47.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
1995 92 : LanguageTag::isIsoCountry( getRegion()) && LanguageTag::isIsoScript( getScript()) &&
1996 104 : getVariants().isEmpty()) ? DECISION_YES : DECISION_NO);
1997 : }
1998 712 : return meIsIsoODF == DECISION_YES;
1999 : }
2000 :
2001 :
2002 716 : bool LanguageTag::isIsoODF() const
2003 : {
2004 716 : bool bRet = getImpl()->isIsoODF();
2005 716 : const_cast<LanguageTag*>(this)->syncFromImpl();
2006 716 : return bRet;
2007 : }
2008 :
2009 :
2010 10832 : bool LanguageTagImpl::isValidBcp47() const
2011 : {
2012 10832 : if (meIsValid == DECISION_DONTKNOW)
2013 : {
2014 0 : const_cast<LanguageTagImpl*>(this)->synCanonicalize();
2015 : SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
2016 : "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
2017 : }
2018 10832 : return meIsValid == DECISION_YES;
2019 : }
2020 :
2021 :
2022 3108 : bool LanguageTag::isValidBcp47() const
2023 : {
2024 3108 : bool bRet = getImpl()->isValidBcp47();
2025 3108 : const_cast<LanguageTag*>(this)->syncFromImpl();
2026 3108 : return bRet;
2027 : }
2028 :
2029 :
2030 :
2031 :
2032 5597 : LanguageTag & LanguageTag::makeFallback()
2033 : {
2034 5597 : if (!mbIsFallback)
2035 : {
2036 5597 : const lang::Locale& rLocale1 = getLocale( true);
2037 5597 : lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
2038 16791 : if ( rLocale1.Language != aLocale2.Language ||
2039 10530 : rLocale1.Country != aLocale2.Country ||
2040 4933 : rLocale1.Variant != aLocale2.Variant)
2041 : {
2042 664 : if (rLocale1.Language != "en" && aLocale2.Language == "en" && aLocale2.Country == "US")
2043 : {
2044 : // "en-US" is the last resort fallback, try if we get a better
2045 : // one for the fallback hierarchy of a non-"en" locale.
2046 0 : ::std::vector< OUString > aFallbacks( getFallbackStrings( false));
2047 0 : for (::std::vector< OUString >::const_iterator it( aFallbacks.begin()); it != aFallbacks.end(); ++it)
2048 : {
2049 0 : lang::Locale aLocale3( LanguageTag( *it).getLocale());
2050 0 : aLocale2 = MsLangId::Conversion::lookupFallbackLocale( aLocale3);
2051 0 : if (aLocale2.Language != "en" || aLocale2.Country != "US")
2052 0 : break; // for, success
2053 0 : }
2054 : }
2055 : SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
2056 : rLocale1.Language << "," << rLocale1.Country << "," << rLocale1.Variant << ") to (" <<
2057 : aLocale2.Language << "," << aLocale2.Country << "," << aLocale2.Variant << ")");
2058 664 : reset( aLocale2);
2059 : }
2060 5597 : mbIsFallback = true;
2061 : }
2062 5597 : return *this;
2063 : }
2064 :
2065 :
2066 : /* TODO: maybe this now could take advantage of the mnOverride field in
2067 : * isolang.cxx entries and search for kSAME instead of harcoded special
2068 : * fallbacks. Though iterating through those tables would be slower and even
2069 : * then there would be some special cases, but we wouldn't lack entries that
2070 : * were missed out. */
2071 335768 : ::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const
2072 : {
2073 335768 : ::std::vector< OUString > aVec;
2074 671536 : OUString aLanguage( getLanguage());
2075 671536 : OUString aCountry( getCountry());
2076 335768 : if (isIsoLocale())
2077 : {
2078 296704 : if (!aCountry.isEmpty())
2079 : {
2080 243588 : if (bIncludeFullBcp47)
2081 215032 : aVec.push_back( aLanguage + "-" + aCountry);
2082 243588 : if (aLanguage == "zh")
2083 : {
2084 : // For zh-HK or zh-MO also list zh-TW, for all other zh-XX also
2085 : // list zh-CN.
2086 2646 : if (aCountry == "HK" || aCountry == "MO")
2087 882 : aVec.push_back( aLanguage + "-TW");
2088 1764 : else if (aCountry != "CN")
2089 882 : aVec.push_back( aLanguage + "-CN");
2090 2646 : aVec.push_back( aLanguage);
2091 : }
2092 240942 : else if (aLanguage == "sh")
2093 : {
2094 : // Manual list instead of calling
2095 : // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
2096 : // that would also include "sh-*" again.
2097 0 : aVec.push_back( "sr-Latn-" + aCountry);
2098 0 : aVec.push_back( "sr-Latn");
2099 0 : aVec.push_back( "sh"); // legacy with script, before default script with country
2100 0 : aVec.push_back( "sr-" + aCountry);
2101 0 : aVec.push_back( "sr");
2102 : }
2103 240942 : else if (aLanguage == "ca" && aCountry == "XV")
2104 : {
2105 0 : ::std::vector< OUString > aRep( LanguageTag( "ca-ES-valencia").getFallbackStrings( true));
2106 0 : aVec.insert( aVec.end(), aRep.begin(), aRep.end());
2107 : // Already includes 'ca' language fallback.
2108 : }
2109 240942 : else if (aLanguage == "ku")
2110 : {
2111 0 : if (aCountry == "TR" || aCountry == "SY")
2112 : {
2113 0 : aVec.push_back( "kmr-Latn-" + aCountry);
2114 0 : aVec.push_back( "kmr-" + aCountry);
2115 0 : aVec.push_back( "kmr-Latn");
2116 0 : aVec.push_back( "kmr");
2117 0 : aVec.push_back( aLanguage);
2118 : }
2119 0 : else if (aCountry == "IQ" || aCountry == "IR")
2120 : {
2121 0 : aVec.push_back( "ckb-" + aCountry);
2122 0 : aVec.push_back( "ckb");
2123 : }
2124 : }
2125 240942 : else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY"))
2126 : {
2127 0 : aVec.push_back( "ku-Latn-" + aCountry);
2128 0 : aVec.push_back( "ku-" + aCountry);
2129 0 : aVec.push_back( aLanguage);
2130 0 : aVec.push_back( "ku");
2131 : }
2132 240942 : else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR"))
2133 : {
2134 2646 : aVec.push_back( "ku-Arab-" + aCountry);
2135 2646 : aVec.push_back( "ku-" + aCountry);
2136 2646 : aVec.push_back( aLanguage);
2137 : // not 'ku' only, that was used for Latin script
2138 : }
2139 : else
2140 238296 : aVec.push_back( aLanguage);
2141 : }
2142 : else
2143 : {
2144 53116 : if (bIncludeFullBcp47)
2145 53112 : aVec.push_back( aLanguage);
2146 53116 : if (aLanguage == "sh")
2147 : {
2148 0 : aVec.push_back( "sr-Latn");
2149 0 : aVec.push_back( "sr");
2150 : }
2151 53116 : else if (aLanguage == "pli")
2152 : {
2153 : // a special case for Pali dictionary, see fdo#41599
2154 0 : aVec.push_back( "pi-Latn");
2155 0 : aVec.push_back( "pi");
2156 : }
2157 : }
2158 296704 : return aVec;
2159 : }
2160 :
2161 39064 : getBcp47(); // have maBcp47 now
2162 39064 : if (bIncludeFullBcp47)
2163 39064 : aVec.push_back( maBcp47);
2164 78128 : OUString aScript;
2165 78128 : OUString aVariants( getVariants());
2166 78128 : OUString aTmp;
2167 39064 : if (hasScript())
2168 : {
2169 36849 : aScript = getScript();
2170 36849 : bool bHaveLanguageScriptVariant = false;
2171 36849 : if (!aCountry.isEmpty())
2172 : {
2173 24854 : if (!aVariants.isEmpty())
2174 : {
2175 0 : aTmp = aLanguage + "-" + aScript + "-" + aCountry + "-" + aVariants;
2176 0 : if (aTmp != maBcp47)
2177 0 : aVec.push_back( aTmp);
2178 : // Language with variant but without country before language
2179 : // without variant but with country.
2180 0 : aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2181 0 : if (aTmp != maBcp47)
2182 0 : aVec.push_back( aTmp);
2183 0 : bHaveLanguageScriptVariant = true;
2184 : }
2185 24854 : aTmp = aLanguage + "-" + aScript + "-" + aCountry;
2186 24854 : if (aTmp != maBcp47)
2187 0 : aVec.push_back( aTmp);
2188 24854 : if (aLanguage == "sr" && aScript == "Latn")
2189 : {
2190 : // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
2191 6178 : if (aCountry == "CS")
2192 : {
2193 1347 : aVec.push_back( "sr-Latn-YU");
2194 1347 : aVec.push_back( "sh-CS");
2195 1347 : aVec.push_back( "sh-YU");
2196 : }
2197 : else
2198 4831 : aVec.push_back( "sh-" + aCountry);
2199 : }
2200 18676 : else if (aLanguage == "pi" && aScript == "Latn")
2201 0 : aVec.push_back( "pli"); // a special case for Pali dictionary, see fdo#41599
2202 18676 : else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY"))
2203 0 : aVec.push_back( "ku-" + aCountry);
2204 : }
2205 36849 : if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant)
2206 : {
2207 0 : aTmp = aLanguage + "-" + aScript + "-" + aVariants;
2208 0 : if (aTmp != maBcp47)
2209 0 : aVec.push_back( aTmp);
2210 : }
2211 36849 : aTmp = aLanguage + "-" + aScript;
2212 36849 : if (aTmp != maBcp47)
2213 24854 : aVec.push_back( aTmp);
2214 :
2215 : // 'sh' actually denoted a script, so have it here instead of appended
2216 : // at the end as language-only.
2217 36849 : if (aLanguage == "sr" && aScript == "Latn")
2218 7501 : aVec.push_back( "sh");
2219 29348 : else if (aLanguage == "ku" && aScript == "Arab")
2220 0 : aVec.push_back( "ckb");
2221 : // 'ku' only denoted Latin script
2222 29348 : else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty())
2223 0 : aVec.push_back( "ku");
2224 : }
2225 39064 : bool bHaveLanguageVariant = false;
2226 39064 : if (!aCountry.isEmpty())
2227 : {
2228 27065 : if (!aVariants.isEmpty())
2229 : {
2230 2211 : aTmp = aLanguage + "-" + aCountry + "-" + aVariants;
2231 2211 : if (aTmp != maBcp47)
2232 0 : aVec.push_back( aTmp);
2233 2211 : if (maBcp47 == "ca-ES-valencia")
2234 1768 : aVec.push_back( "ca-XV");
2235 : // Language with variant but without country before language
2236 : // without variant but with country.
2237 : // But only if variant is not from a grandfathered tag that
2238 : // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
2239 : // not.
2240 4422 : if (aVariants.getLength() >= 5 ||
2241 443 : (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2242 : {
2243 1768 : aTmp = aLanguage + "-" + aVariants;
2244 1768 : if (aTmp != maBcp47)
2245 1768 : aVec.push_back( aTmp);
2246 1768 : bHaveLanguageVariant = true;
2247 : }
2248 : }
2249 27065 : aTmp = aLanguage + "-" + aCountry;
2250 27065 : if (aTmp != maBcp47)
2251 27065 : aVec.push_back( aTmp);
2252 : }
2253 39064 : if (!aVariants.isEmpty() && !bHaveLanguageVariant)
2254 : {
2255 : // Only if variant is not from a grandfathered tag that wouldn't match
2256 : // the rules, i.e. "de-1901" is fine but "en-oed" is not.
2257 896 : if (aVariants.getLength() >= 5 ||
2258 447 : (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
2259 : {
2260 4 : aTmp = aLanguage + "-" + aVariants;
2261 4 : if (aTmp != maBcp47)
2262 0 : aVec.push_back( aTmp);
2263 : }
2264 : }
2265 :
2266 : // Insert legacy fallbacks with country before language-only, but only
2267 : // default script, script was handled already above.
2268 39064 : if (!aCountry.isEmpty())
2269 : {
2270 27065 : if (aLanguage == "sr" && aCountry == "CS")
2271 1347 : aVec.push_back( "sr-YU");
2272 : }
2273 :
2274 : // Original language-only.
2275 39064 : if (aLanguage != maBcp47)
2276 39064 : aVec.push_back( aLanguage);
2277 :
2278 39064 : return aVec;
2279 : }
2280 :
2281 :
2282 0 : bool LanguageTag::equals( const LanguageTag & rLanguageTag, bool bResolveSystem ) const
2283 : {
2284 : // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
2285 : // can use the operator==() optimization.
2286 0 : if (!bResolveSystem || isSystemLocale() == rLanguageTag.isSystemLocale())
2287 0 : return operator==( rLanguageTag);
2288 :
2289 : // Compare full language tag strings.
2290 0 : return getBcp47( bResolveSystem) == rLanguageTag.getBcp47( bResolveSystem);
2291 : }
2292 :
2293 :
2294 2239517 : bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
2295 : {
2296 2239517 : if (isSystemLocale() && rLanguageTag.isSystemLocale())
2297 96194 : return true; // both SYSTEM
2298 :
2299 : // No need to convert to BCP47 if both Lang-IDs are available.
2300 2143323 : if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
2301 : {
2302 : // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
2303 2107561 : return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
2304 : }
2305 :
2306 : // Compare full language tag strings but SYSTEM unresolved.
2307 35762 : return getBcp47( false) == rLanguageTag.getBcp47( false);
2308 : }
2309 :
2310 :
2311 2231478 : bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
2312 : {
2313 2231478 : return !operator==( rLanguageTag);
2314 : }
2315 :
2316 :
2317 108 : bool LanguageTag::operator<( const LanguageTag & rLanguageTag ) const
2318 : {
2319 108 : return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag.getBcp47( false)) < 0;
2320 : }
2321 :
2322 :
2323 : // static
2324 476080 : LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp47,
2325 : OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rVariants )
2326 : {
2327 476080 : Extraction eRet = EXTRACTED_NONE;
2328 476080 : const sal_Int32 nLen = rBcp47.getLength();
2329 476080 : const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
2330 476080 : sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
2331 476080 : sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
2332 476080 : sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
2333 476080 : if (nLen == 1 && rBcp47[0] == '*') // * the dreaded jolly joker
2334 : {
2335 : // It's f*d up but we need to recognize this.
2336 12 : eRet = EXTRACTED_X_JOKER;
2337 : }
2338 476068 : else if (nHyph1 == 1 && rBcp47[0] == 'x') // x-... privateuse
2339 : {
2340 : // x-... privateuse tags MUST be known to us by definition.
2341 26 : eRet = EXTRACTED_X;
2342 : }
2343 476042 : else if (nLen == 2 || nLen == 3) // ll or lll
2344 : {
2345 85514 : if (nHyph1 < 0)
2346 : {
2347 42757 : rLanguage = rBcp47.toAsciiLowerCase();
2348 42757 : rScript = rCountry = rVariants = OUString();
2349 42757 : eRet = EXTRACTED_LSC;
2350 : }
2351 : }
2352 433285 : else if ( (nHyph1 == 2 && nLen == 5) // ll-CC
2353 169895 : || (nHyph1 == 3 && nLen == 6)) // lll-CC
2354 : {
2355 755768 : if (nHyph2 < 0)
2356 : {
2357 377884 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2358 377884 : rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2359 377884 : rScript = rVariants = OUString();
2360 377884 : eRet = EXTRACTED_LSC;
2361 : }
2362 : }
2363 55401 : else if ( (nHyph1 == 2 && nLen == 7) // ll-Ssss or ll-vvvv
2364 38237 : || (nHyph1 == 3 && nLen == 8)) // lll-Ssss or lll-vvvv
2365 : {
2366 20604 : if (nHyph2 < 0)
2367 : {
2368 20604 : sal_Unicode c = rBcp47[nHyph1+1];
2369 20604 : if ('0' <= c && c <= '9')
2370 : {
2371 : // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
2372 2 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2373 2 : rScript = rCountry = OUString();
2374 2 : rVariants = rBcp47.copy( nHyph1 + 1);
2375 2 : eRet = EXTRACTED_LV;
2376 : }
2377 : else
2378 : {
2379 20602 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2380 41204 : rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() +
2381 61806 : rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2382 20602 : rCountry = rVariants = OUString();
2383 20602 : eRet = EXTRACTED_LSC;
2384 : }
2385 20604 : }
2386 : }
2387 34797 : else if ( (nHyph1 == 2 && nHyph2 == 7 && nLen == 10) // ll-Ssss-CC
2388 11905 : || (nHyph1 == 3 && nHyph2 == 8 && nLen == 11)) // lll-Ssss-CC
2389 : {
2390 65830 : if (nHyph3 < 0)
2391 : {
2392 32915 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2393 32915 : rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2394 32915 : rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2395 32915 : rVariants = OUString();
2396 32915 : eRet = EXTRACTED_LSC;
2397 : }
2398 : }
2399 1882 : else if ( (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15) // ll-Ssss-CC-vvvv[vvvv][-...]
2400 1882 : || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16)) // lll-Ssss-CC-vvvv[vvvv][-...]
2401 : {
2402 0 : if (nHyph4 < 0)
2403 0 : nHyph4 = rBcp47.getLength();
2404 0 : if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
2405 : {
2406 0 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2407 0 : rScript = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
2408 0 : rCountry = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
2409 0 : rVariants = rBcp47.copy( nHyph3 + 1);
2410 0 : eRet = EXTRACTED_LV;
2411 : }
2412 : }
2413 1882 : else if ( (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10) // ll-CC-vvvv[vvvv][-...]
2414 878 : || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11)) // lll-CC-vvvv[vvvv][-...]
2415 : {
2416 1004 : if (nHyph3 < 0)
2417 1004 : nHyph3 = rBcp47.getLength();
2418 2008 : if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
2419 : {
2420 1004 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2421 1004 : rScript = OUString();
2422 1004 : rCountry = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
2423 1004 : rVariants = rBcp47.copy( nHyph2 + 1);
2424 1004 : eRet = EXTRACTED_LV;
2425 : }
2426 : }
2427 878 : else if ( (nHyph1 == 2 && nLen >= 8) // ll-vvvvv[vvv][-...]
2428 8 : || (nHyph1 == 3 && nLen >= 9)) // lll-vvvvv[vvv][-...]
2429 : {
2430 870 : if (nHyph2 < 0)
2431 10 : nHyph2 = rBcp47.getLength();
2432 870 : if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)
2433 : {
2434 4 : rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
2435 4 : rScript = rCountry = OUString();
2436 4 : rVariants = rBcp47.copy( nHyph1 + 1);
2437 4 : eRet = EXTRACTED_LV;
2438 : }
2439 : else
2440 : {
2441 : // Known and handled grandfathered; ugly but effective ...
2442 : // Note that nLen must have matched above.
2443 : // Strictly not a variant, but so far we treat it as such.
2444 866 : if (rBcp47.equalsIgnoreAsciiCase( "en-GB-oed"))
2445 : {
2446 860 : rLanguage = "en";
2447 860 : rScript = OUString();
2448 860 : rCountry = "GB";
2449 860 : rVariants = "oed";
2450 860 : eRet = EXTRACTED_LV;
2451 : }
2452 : }
2453 : }
2454 476080 : if (eRet == EXTRACTED_NONE)
2455 : {
2456 : SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'");
2457 14 : rLanguage = rScript = rCountry = rVariants = OUString();
2458 : }
2459 476080 : return eRet;
2460 : }
2461 :
2462 :
2463 : // static
2464 73896 : ::std::vector< OUString >::const_iterator LanguageTag::getFallback(
2465 : const ::std::vector< OUString > & rList, const OUString & rReference )
2466 : {
2467 73896 : if (rList.empty())
2468 6954 : return rList.end();
2469 :
2470 66942 : ::std::vector< OUString >::const_iterator it;
2471 :
2472 : // Try the simple case first without constructing fallbacks.
2473 95448 : for (it = rList.begin(); it != rList.end(); ++it)
2474 : {
2475 66942 : if (*it == rReference)
2476 38436 : return it; // exact match
2477 : }
2478 :
2479 28506 : ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2480 28506 : if (rReference != "en-US")
2481 : {
2482 0 : aFallbacks.push_back( "en-US");
2483 0 : if (rReference != "en")
2484 0 : aFallbacks.push_back( "en");
2485 : }
2486 28506 : if (rReference != "x-default")
2487 28506 : aFallbacks.push_back( "x-default");
2488 28506 : if (rReference != "x-no-translate")
2489 28506 : aFallbacks.push_back( "x-no-translate");
2490 : /* TODO: the original comphelper::Locale::getFallback() code had
2491 : * "x-notranslate" instead of "x-no-translate", but all .xcu files use
2492 : * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
2493 : * Did that ever work? Was it supposed to work at all like this? */
2494 :
2495 111588 : for (::std::vector< OUString >::const_iterator fb = aFallbacks.begin(); fb != aFallbacks.end(); ++fb)
2496 : {
2497 167382 : for (it = rList.begin(); it != rList.end(); ++it)
2498 : {
2499 84300 : if (*it == *fb)
2500 1218 : return it; // fallback found
2501 : }
2502 : }
2503 :
2504 : // Did not find anything so return something of the list, the first value
2505 : // will do as well as any other as none did match any of the possible
2506 : // fallbacks.
2507 27288 : return rList.begin();
2508 : }
2509 :
2510 :
2511 : // static
2512 0 : ::std::vector< com::sun::star::lang::Locale >::const_iterator LanguageTag::getMatchingFallback(
2513 : const ::std::vector< com::sun::star::lang::Locale > & rList,
2514 : const com::sun::star::lang::Locale & rReference )
2515 : {
2516 0 : if (rList.empty())
2517 0 : return rList.end();
2518 :
2519 0 : ::std::vector< lang::Locale >::const_iterator it;
2520 :
2521 : // Try the simple case first without constructing fallbacks.
2522 0 : for (it = rList.begin(); it != rList.end(); ++it)
2523 : {
2524 0 : if ( (*it).Language == rReference.Language &&
2525 0 : (*it).Country == rReference.Country &&
2526 0 : (*it).Variant == rReference.Variant)
2527 0 : return it; // exact match
2528 : }
2529 :
2530 : // Now for each reference fallback test the fallbacks of the list in order.
2531 0 : ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
2532 0 : ::std::vector< ::std::vector< OUString > > aListFallbacks( rList.size());
2533 0 : size_t i = 0;
2534 0 : for (it = rList.begin(); it != rList.end(); ++it, ++i)
2535 : {
2536 0 : ::std::vector< OUString > aTmp( LanguageTag( *it).getFallbackStrings( true));
2537 0 : aListFallbacks[i] = aTmp;
2538 0 : }
2539 0 : for (::std::vector< OUString >::const_iterator rfb( aFallbacks.begin()); rfb != aFallbacks.end(); ++rfb)
2540 : {
2541 0 : for (::std::vector< ::std::vector< OUString > >::const_iterator lfb( aListFallbacks.begin());
2542 0 : lfb != aListFallbacks.end(); ++lfb)
2543 : {
2544 0 : for (::std::vector< OUString >::const_iterator fb( (*lfb).begin()); fb != (*lfb).end(); ++fb)
2545 : {
2546 0 : if (*rfb == *fb)
2547 0 : return rList.begin() + (lfb - aListFallbacks.begin());
2548 : }
2549 : }
2550 : }
2551 :
2552 : // No match found.
2553 0 : return rList.end();
2554 : }
2555 :
2556 :
2557 408 : static bool lcl_isSystem( LanguageType nLangID )
2558 : {
2559 408 : if (nLangID == LANGUAGE_SYSTEM)
2560 342 : return true;
2561 : // There are some special values that simplify to SYSTEM,
2562 : // getRealLanguage() catches and resolves them.
2563 66 : LanguageType nNewLangID = MsLangId::getRealLanguage( nLangID);
2564 66 : if (nNewLangID != nLangID)
2565 0 : return true;
2566 66 : return false;
2567 : }
2568 :
2569 :
2570 : // static
2571 316284 : com::sun::star::lang::Locale LanguageTag::convertToLocale( LanguageType nLangID, bool bResolveSystem )
2572 : {
2573 316284 : if (!bResolveSystem && lcl_isSystem( nLangID))
2574 342 : return lang::Locale();
2575 :
2576 315942 : return LanguageTag( nLangID).getLocale( bResolveSystem);
2577 : }
2578 :
2579 :
2580 : // static
2581 630059 : LanguageType LanguageTag::convertToLanguageType( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem )
2582 : {
2583 630059 : if (rLocale.Language.isEmpty() && !bResolveSystem)
2584 42522 : return LANGUAGE_SYSTEM;
2585 :
2586 587537 : return LanguageTag( rLocale).getLanguageType( bResolveSystem);
2587 : }
2588 :
2589 :
2590 : // static
2591 911942 : OUString LanguageTagImpl::convertToBcp47( const com::sun::star::lang::Locale& rLocale )
2592 : {
2593 911942 : OUString aBcp47;
2594 911942 : if (rLocale.Language.isEmpty())
2595 : {
2596 : // aBcp47 stays empty
2597 : }
2598 911942 : else if (rLocale.Language == I18NLANGTAG_QLT)
2599 : {
2600 45352 : aBcp47 = rLocale.Variant;
2601 : }
2602 : else
2603 : {
2604 : /* XXX NOTE: most legacy code never evaluated the Variant field, so for
2605 : * now just concatenate language and country. In case we stumbled over
2606 : * variant aware code we'd have to take care of that. */
2607 866590 : if (rLocale.Country.isEmpty())
2608 120646 : aBcp47 = rLocale.Language;
2609 : else
2610 : {
2611 745944 : aBcp47 = rLocale.Language + "-" + rLocale.Country;
2612 : }
2613 : }
2614 911942 : return aBcp47;
2615 : }
2616 :
2617 :
2618 : // static
2619 15564 : OUString LanguageTag::convertToBcp47( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem )
2620 : {
2621 15564 : OUString aBcp47;
2622 15564 : if (rLocale.Language.isEmpty())
2623 : {
2624 30 : if (bResolveSystem)
2625 8 : aBcp47 = LanguageTag::convertToBcp47( LANGUAGE_SYSTEM, true);
2626 : // else aBcp47 stays empty
2627 : }
2628 : else
2629 : {
2630 15534 : aBcp47 = LanguageTagImpl::convertToBcp47( rLocale);
2631 : }
2632 15564 : return aBcp47;
2633 : }
2634 :
2635 :
2636 : // static
2637 600 : OUString LanguageTag::convertToBcp47( LanguageType nLangID, bool bResolveSystem )
2638 : {
2639 : // Catch this first so we don't need the rest.
2640 600 : if (!bResolveSystem && lcl_isSystem( nLangID))
2641 0 : return OUString();
2642 :
2643 600 : lang::Locale aLocale( LanguageTag::convertToLocale( nLangID, bResolveSystem));
2644 : // If system for some reason (should not happen.. haha) could not be
2645 : // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
2646 : // would recurse into this method here!
2647 600 : if (aLocale.Language.isEmpty() && bResolveSystem)
2648 0 : return OUString(); // bad luck, bail out
2649 600 : return LanguageTagImpl::convertToBcp47( aLocale);
2650 : }
2651 :
2652 :
2653 : // static
2654 80824 : com::sun::star::lang::Locale LanguageTag::convertToLocale( const OUString& rBcp47, bool bResolveSystem )
2655 : {
2656 80824 : if (rBcp47.isEmpty() && !bResolveSystem)
2657 0 : return lang::Locale();
2658 :
2659 80824 : return LanguageTag( rBcp47).getLocale( bResolveSystem);
2660 : }
2661 :
2662 :
2663 : // static
2664 2968 : LanguageType LanguageTag::convertToLanguageType( const OUString& rBcp47, bool bResolveSystem )
2665 : {
2666 2968 : if (rBcp47.isEmpty() && !bResolveSystem)
2667 0 : return LANGUAGE_SYSTEM;
2668 :
2669 2968 : return LanguageTag( rBcp47).getLanguageType( bResolveSystem);
2670 : }
2671 :
2672 :
2673 : // static
2674 4571 : LanguageType LanguageTag::convertToLanguageTypeWithFallback( const OUString& rBcp47 )
2675 : {
2676 4571 : return LanguageTag( rBcp47).makeFallback().getLanguageType( true);
2677 : }
2678 :
2679 :
2680 : // static
2681 0 : com::sun::star::lang::Locale LanguageTag::convertToLocaleWithFallback( const OUString& rBcp47 )
2682 : {
2683 0 : return LanguageTag( rBcp47).makeFallback().getLocale( true);
2684 : }
2685 :
2686 :
2687 : // static
2688 18 : bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicalized, bool bDisallowPrivate )
2689 : {
2690 18 : bool bValid = false;
2691 :
2692 : struct guard
2693 : {
2694 : lt_tag_t* mpLangtag;
2695 18 : guard()
2696 : {
2697 18 : theDataRef::get().incRef();
2698 18 : mpLangtag = lt_tag_new();
2699 18 : }
2700 18 : ~guard()
2701 : {
2702 18 : lt_tag_unref( mpLangtag);
2703 18 : theDataRef::get().decRef();
2704 18 : }
2705 18 : } aVar;
2706 :
2707 36 : myLtError aError;
2708 :
2709 18 : if (lt_tag_parse( aVar.mpLangtag, OUStringToOString( rString, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
2710 : {
2711 16 : char* pTag = lt_tag_canonicalize( aVar.mpLangtag, &aError.p);
2712 : SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTag:isValidBcp47: could not canonicalize '" << rString << "'");
2713 16 : if (pTag)
2714 : {
2715 16 : bValid = true;
2716 16 : if (bDisallowPrivate)
2717 : {
2718 6 : const lt_string_t* pPrivate = lt_tag_get_privateuse( aVar.mpLangtag);
2719 6 : if (pPrivate && lt_string_length( pPrivate) > 0)
2720 2 : bValid = false;
2721 : else
2722 : {
2723 4 : const lt_lang_t* pLangT = lt_tag_get_language( aVar.mpLangtag);
2724 4 : if (pLangT)
2725 : {
2726 4 : const char* pLang = lt_lang_get_tag( pLangT);
2727 4 : if (pLang && strcmp( pLang, I18NLANGTAG_QLT) == 0)
2728 : {
2729 : // Disallow 'qlt' privateuse code to prevent
2730 : // confusion with our internal usage.
2731 0 : bValid = false;
2732 : }
2733 : }
2734 : }
2735 : }
2736 16 : if (o_pCanonicalized)
2737 16 : *o_pCanonicalized = OUString::createFromAscii( pTag);
2738 16 : free( pTag);
2739 16 : return bValid;
2740 : }
2741 : }
2742 : else
2743 : {
2744 : SAL_INFO( "i18nlangtag", "LanguageTag:isValidBcp47: could not parse '" << rString << "'");
2745 : }
2746 20 : return bValid;
2747 : }
2748 :
2749 8039 : LanguageTag makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage)
2750 : {
2751 : //map the simple ones via LanguageTypes, and the hard ones explictly
2752 8039 : LanguageType nLang(LANGUAGE_DONTKNOW);
2753 :
2754 8039 : switch (nLanguage)
2755 : {
2756 : case APPLE_LANG_ENGLISH:
2757 8039 : nLang = LANGUAGE_ENGLISH;
2758 8039 : break;
2759 : case APPLE_LANG_FRENCH:
2760 0 : nLang = LANGUAGE_FRENCH;
2761 0 : break;
2762 : case APPLE_LANG_GERMAN:
2763 0 : nLang = LANGUAGE_GERMAN;
2764 0 : break;
2765 : case APPLE_LANG_ITALIAN:
2766 0 : nLang = LANGUAGE_ITALIAN;
2767 0 : break;
2768 : case APPLE_LANG_DUTCH:
2769 0 : nLang = LANGUAGE_DUTCH;
2770 0 : break;
2771 : case APPLE_LANG_SWEDISH:
2772 0 : nLang = LANGUAGE_SWEDISH;
2773 0 : break;
2774 : case APPLE_LANG_SPANISH:
2775 0 : nLang = LANGUAGE_SPANISH;
2776 0 : break;
2777 : case APPLE_LANG_DANISH:
2778 0 : nLang = LANGUAGE_DANISH;
2779 0 : break;
2780 : case APPLE_LANG_PORTUGUESE:
2781 0 : nLang = LANGUAGE_PORTUGUESE;
2782 0 : break;
2783 : case APPLE_LANG_NORWEGIAN:
2784 0 : nLang = LANGUAGE_NORWEGIAN;
2785 0 : break;
2786 : case APPLE_LANG_HEBREW:
2787 0 : nLang = LANGUAGE_HEBREW;
2788 0 : break;
2789 : case APPLE_LANG_JAPANESE:
2790 0 : nLang = LANGUAGE_JAPANESE;
2791 0 : break;
2792 : case APPLE_LANG_ARABIC:
2793 0 : nLang = LANGUAGE_ARABIC_PRIMARY_ONLY;
2794 0 : break;
2795 : case APPLE_LANG_FINNISH:
2796 0 : nLang = LANGUAGE_FINNISH;
2797 0 : break;
2798 : case APPLE_LANG_GREEK:
2799 0 : nLang = LANGUAGE_GREEK;
2800 0 : break;
2801 : case APPLE_LANG_ICELANDIC:
2802 0 : nLang = LANGUAGE_ICELANDIC;
2803 0 : break;
2804 : case APPLE_LANG_MALTESE:
2805 0 : nLang = LANGUAGE_MALTESE;
2806 0 : break;
2807 : case APPLE_LANG_TURKISH:
2808 0 : nLang = LANGUAGE_TURKISH;
2809 0 : break;
2810 : case APPLE_LANG_CROATIAN:
2811 0 : nLang = LANGUAGE_CROATIAN;
2812 0 : break;
2813 : case APPLE_LANG_CHINESE_TRADITIONAL:
2814 0 : nLang = LANGUAGE_CHINESE_TRADITIONAL;
2815 0 : break;
2816 : case APPLE_LANG_URDU:
2817 0 : nLang = LANGUAGE_URDU_PAKISTAN; //probably, otherwise we need a LANGUAGE_URDU_PRIMARY_ONLY
2818 0 : break;
2819 : case APPLE_LANG_HINDI:
2820 0 : nLang = LANGUAGE_HINDI;
2821 0 : break;
2822 : case APPLE_LANG_THAI:
2823 0 : nLang = LANGUAGE_THAI;
2824 0 : break;
2825 : case APPLE_LANG_KOREAN:
2826 0 : nLang = LANGUAGE_KOREAN;
2827 0 : break;
2828 : case APPLE_LANG_LITHUANIAN:
2829 0 : nLang = LANGUAGE_LITHUANIAN;
2830 0 : break;
2831 : case APPLE_LANG_POLISH:
2832 0 : nLang = LANGUAGE_POLISH;
2833 0 : break;
2834 : case APPLE_LANG_HUNGARIAN:
2835 0 : nLang = LANGUAGE_HUNGARIAN;
2836 0 : break;
2837 : case APPLE_LANG_ESTONIAN:
2838 0 : nLang = LANGUAGE_ESTONIAN;
2839 0 : break;
2840 : case APPLE_LANG_LATVIAN:
2841 0 : nLang = LANGUAGE_LATVIAN;
2842 0 : break;
2843 : case APPLE_LANG_SAMI:
2844 0 : nLang = LANGUAGE_SAMI_NORTHERN_NORWAY; //maybe
2845 0 : break;
2846 : case APPLE_LANG_FAROESE:
2847 0 : nLang = LANGUAGE_FAEROESE;
2848 0 : break;
2849 : case APPLE_LANG_FARSI:
2850 0 : nLang = LANGUAGE_FARSI;
2851 0 : break;
2852 : case APPLE_LANG_RUSSIAN:
2853 0 : nLang = LANGUAGE_RUSSIAN;
2854 0 : break;
2855 : case APPLE_LANG_CHINESE_SIMPLIFIED:
2856 0 : nLang = LANGUAGE_CHINESE_SIMPLIFIED;
2857 0 : break;
2858 : case APPLE_LANG_FLEMISH:
2859 0 : nLang = LANGUAGE_DUTCH_BELGIAN;
2860 0 : break;
2861 : case APPLE_LANG_IRISH_GAELIC:
2862 0 : nLang = LANGUAGE_GAELIC_IRELAND;
2863 0 : break;
2864 : case APPLE_LANG_ALBANIAN:
2865 0 : nLang = LANGUAGE_ALBANIAN;
2866 0 : break;
2867 : case APPLE_LANG_ROMANIAN:
2868 0 : nLang = LANGUAGE_ROMANIAN;
2869 0 : break;
2870 : case APPLE_LANG_CZECH:
2871 0 : nLang = LANGUAGE_CZECH;
2872 0 : break;
2873 : case APPLE_LANG_SLOVAK:
2874 0 : nLang = LANGUAGE_SLOVAK;
2875 0 : break;
2876 : case APPLE_LANG_SLOVENIAN:
2877 0 : nLang = LANGUAGE_SLOVENIAN;
2878 0 : break;
2879 : case APPLE_LANG_YIDDISH:
2880 0 : nLang = LANGUAGE_YIDDISH;
2881 0 : break;
2882 : case APPLE_LANG_SERBIAN:
2883 0 : nLang = LANGUAGE_SERBIAN_CYRILLIC_SERBIA; //maybe
2884 0 : break;
2885 : case APPLE_LANG_MACEDONIAN:
2886 0 : nLang = LANGUAGE_MACEDONIAN;
2887 0 : break;
2888 : case APPLE_LANG_BULGARIAN:
2889 0 : nLang = LANGUAGE_BULGARIAN;
2890 0 : break;
2891 : case APPLE_LANG_UKRAINIAN:
2892 0 : nLang = LANGUAGE_UKRAINIAN;
2893 0 : break;
2894 : case APPLE_LANG_BYELORUSSIAN:
2895 0 : nLang = LANGUAGE_BELARUSIAN;
2896 0 : break;
2897 : case APPLE_LANG_UZBEK:
2898 0 : nLang = LANGUAGE_UZBEK_CYRILLIC; //maybe
2899 0 : break;
2900 : case APPLE_LANG_KAZAKH:
2901 0 : nLang = LANGUAGE_KAZAKH;
2902 0 : break;
2903 : case APPLE_LANG_AZERI_CYRILLIC:
2904 0 : nLang = LANGUAGE_AZERI_CYRILLIC;
2905 0 : break;
2906 : case APPLE_LANG_AZERI_ARABIC:
2907 0 : return LanguageTag("az-Arab");
2908 : break;
2909 : case APPLE_LANG_ARMENIAN:
2910 0 : nLang = LANGUAGE_ARMENIAN;
2911 0 : break;
2912 : case APPLE_LANG_GEORGIAN:
2913 0 : nLang = LANGUAGE_GEORGIAN;
2914 0 : break;
2915 : case APPLE_LANG_MOLDAVIAN:
2916 0 : nLang = LANGUAGE_ROMANIAN_MOLDOVA;
2917 0 : break;
2918 : case APPLE_LANG_KIRGHIZ:
2919 0 : nLang = LANGUAGE_KIRGHIZ;
2920 0 : break;
2921 : case APPLE_LANG_TAJIKI:
2922 0 : nLang = LANGUAGE_TAJIK;
2923 0 : break;
2924 : case APPLE_LANG_TURKMEN:
2925 0 : nLang = LANGUAGE_TURKMEN;
2926 0 : break;
2927 : case APPLE_LANG_MONGOLIAN_MONGOLIAN:
2928 0 : nLang = LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA;
2929 0 : break;
2930 : case APPLE_LANG_MONGOLIAN_CYRILLIC:
2931 0 : nLang = LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA;
2932 0 : break;
2933 : case APPLE_LANG_PASHTO:
2934 0 : nLang = LANGUAGE_PASHTO;
2935 0 : break;
2936 : case APPLE_LANG_KURDISH:
2937 0 : nLang = LANGUAGE_USER_KURDISH_TURKEY; //maybe
2938 0 : break;
2939 : case APPLE_LANG_KASHMIRI:
2940 0 : nLang = LANGUAGE_KASHMIRI;
2941 0 : break;
2942 : case APPLE_LANG_SINDHI:
2943 0 : nLang = LANGUAGE_SINDHI;
2944 0 : break;
2945 : case APPLE_LANG_TIBETAN:
2946 0 : nLang = LANGUAGE_TIBETAN;
2947 0 : break;
2948 : case APPLE_LANG_NEPALI:
2949 0 : nLang = LANGUAGE_NEPALI;
2950 0 : break;
2951 : case APPLE_LANG_SANSKRIT:
2952 0 : nLang = LANGUAGE_SANSKRIT;
2953 0 : break;
2954 : case APPLE_LANG_MARATHI:
2955 0 : nLang = LANGUAGE_MARATHI;
2956 0 : break;
2957 : case APPLE_LANG_BENGALI:
2958 0 : nLang = LANGUAGE_BENGALI;
2959 0 : break;
2960 : case APPLE_LANG_ASSAMESE:
2961 0 : nLang = LANGUAGE_ASSAMESE;
2962 0 : break;
2963 : case APPLE_LANG_GUJARATI:
2964 0 : nLang = LANGUAGE_GUJARATI;
2965 0 : break;
2966 : case APPLE_LANG_PUNJABI:
2967 0 : nLang = LANGUAGE_PUNJABI;
2968 0 : break;
2969 : case APPLE_LANG_ORIYA:
2970 0 : nLang = LANGUAGE_ODIA;
2971 0 : break;
2972 : case APPLE_LANG_MALAYALAM:
2973 0 : nLang = LANGUAGE_MALAYALAM;
2974 0 : break;
2975 : case APPLE_LANG_KANNADA:
2976 0 : nLang = LANGUAGE_KANNADA;
2977 0 : break;
2978 : case APPLE_LANG_TAMIL:
2979 0 : nLang = LANGUAGE_TAMIL;
2980 0 : break;
2981 : case APPLE_LANG_TELUGU:
2982 0 : nLang = LANGUAGE_TELUGU;
2983 0 : break;
2984 : case APPLE_LANG_SINHALESE:
2985 0 : nLang = LANGUAGE_SINHALESE_SRI_LANKA;
2986 0 : break;
2987 : case APPLE_LANG_BURMESE:
2988 0 : nLang = LANGUAGE_BURMESE;
2989 0 : break;
2990 : case APPLE_LANG_KHMER:
2991 0 : nLang = LANGUAGE_KHMER;
2992 0 : break;
2993 : case APPLE_LANG_LAO:
2994 0 : nLang = LANGUAGE_LAO;
2995 0 : break;
2996 : case APPLE_LANG_VIETNAMESE:
2997 0 : nLang = LANGUAGE_VIETNAMESE;
2998 0 : break;
2999 : case APPLE_LANG_INDONESIAN:
3000 0 : nLang = LANGUAGE_INDONESIAN;
3001 0 : break;
3002 : case APPLE_LANG_TAGALONG:
3003 0 : nLang = LANGUAGE_USER_TAGALOG;
3004 0 : break;
3005 : case APPLE_LANG_MALAY_LATIN:
3006 0 : nLang = LANGUAGE_MALAY_MALAYSIA;
3007 0 : break;
3008 : case APPLE_LANG_MALAY_ARABIC:
3009 0 : return LanguageTag("ms-Arab");
3010 : break;
3011 : case APPLE_LANG_AMHARIC:
3012 0 : nLang = LANGUAGE_AMHARIC_ETHIOPIA;
3013 0 : break;
3014 : case APPLE_LANG_TIGRINYA:
3015 0 : nLang = LANGUAGE_TIGRIGNA_ETHIOPIA;
3016 0 : break;
3017 : case APPLE_LANG_GALLA:
3018 0 : nLang = LANGUAGE_OROMO;
3019 0 : break;
3020 : case APPLE_LANG_SOMALI:
3021 0 : nLang = LANGUAGE_SOMALI;
3022 0 : break;
3023 : case APPLE_LANG_SWAHILI:
3024 0 : nLang = LANGUAGE_SWAHILI;
3025 0 : break;
3026 : case APPLE_LANG_KINYARWANDA:
3027 0 : nLang = LANGUAGE_KINYARWANDA_RWANDA;
3028 0 : break;
3029 : case APPLE_LANG_RUNDI:
3030 0 : return LanguageTag("rn");
3031 : break;
3032 : case APPLE_LANG_NYANJA:
3033 0 : nLang = LANGUAGE_USER_NYANJA;
3034 0 : break;
3035 : case APPLE_LANG_MALAGASY:
3036 0 : nLang = LANGUAGE_MALAGASY_PLATEAU;
3037 0 : break;
3038 : case APPLE_LANG_ESPERANTO:
3039 0 : nLang = LANGUAGE_USER_ESPERANTO;
3040 0 : break;
3041 : case APPLE_LANG_WELSH:
3042 0 : nLang = LANGUAGE_WELSH;
3043 0 : break;
3044 : case APPLE_LANG_BASQUE:
3045 0 : nLang = LANGUAGE_BASQUE;
3046 0 : break;
3047 : case APPLE_LANG_CATALAN:
3048 0 : nLang = LANGUAGE_CATALAN;
3049 0 : break;
3050 : case APPLE_LANG_LATIN:
3051 0 : nLang = LANGUAGE_USER_LATIN;
3052 0 : break;
3053 : case APPLE_LANG_QUENCHUA:
3054 0 : nLang = LANGUAGE_QUECHUA_BOLIVIA; //maybe
3055 0 : break;
3056 : case APPLE_LANG_GUARANI:
3057 0 : nLang = LANGUAGE_GUARANI_PARAGUAY;
3058 0 : break;
3059 : case APPLE_LANG_AYMARA:
3060 0 : return LanguageTag("ay");
3061 : break;
3062 : case APPLE_LANG_TATAR:
3063 0 : nLang = LANGUAGE_TATAR;
3064 0 : break;
3065 : case APPLE_LANG_UIGHUR:
3066 0 : nLang = LANGUAGE_UIGHUR_CHINA;
3067 0 : break;
3068 : case APPLE_LANG_DZONGKHA:
3069 0 : nLang = LANGUAGE_DZONGKHA;
3070 0 : break;
3071 : case APPLE_LANG_JAVANESE_LATIN:
3072 0 : return LanguageTag("jv-Latn");
3073 : break;
3074 : case APPLE_LANG_SUNDANESE_LATIN:
3075 0 : return LanguageTag("su-Latn");
3076 : break;
3077 : case APPLE_LANG_GALICIAN:
3078 0 : nLang = LANGUAGE_GALICIAN;
3079 0 : break;
3080 : case APPLE_LANG_AFRIKAANS:
3081 0 : nLang = LANGUAGE_AFRIKAANS;
3082 0 : break;
3083 : case APPLE_LANG_BRETON:
3084 0 : nLang = LANGUAGE_BRETON_FRANCE;
3085 0 : break;
3086 : case APPLE_LANG_INUKTITUT:
3087 0 : nLang = LANGUAGE_INUKTITUT_LATIN_CANADA; //probably
3088 0 : break;
3089 : case APPLE_LANG_SCOTTISH_GAELIC:
3090 0 : nLang = LANGUAGE_GAELIC_SCOTLAND;
3091 0 : break;
3092 : case APPLE_LANG_MANX_GAELIC:
3093 0 : nLang = LANGUAGE_USER_MANX;
3094 0 : break;
3095 : case APPLE_LANG_IRISH_GAELIC_WITH_DOT_ABOVE:
3096 0 : return LanguageTag("ga-Latg");
3097 : break;
3098 : case APPLE_LANG_TONGAN:
3099 0 : return LanguageTag("to");
3100 : break;
3101 : case APPLE_LANG_GREEK_POLYTONIC:
3102 0 : nLang = LANGUAGE_USER_ANCIENT_GREEK;
3103 0 : break;
3104 : case APPLE_LANG_GREENLANDIC:
3105 0 : nLang = LANGUAGE_KALAALLISUT_GREENLAND;
3106 0 : break;
3107 : case APPLE_LANG_AZERI_LATIN:
3108 0 : nLang = LANGUAGE_AZERI_LATIN;
3109 0 : break;
3110 : }
3111 :
3112 8039 : return LanguageTag(nLang);
3113 : }
3114 :
3115 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|