LCOV - code coverage report
Current view: top level - i18nlangtag/source/languagetag - languagetag.cxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 949 1410 67.3 %
Date: 2015-06-13 12:38:46 Functions: 97 108 89.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include <config_folders.h>
      11             : #include <config_liblangtag.h>
      12             : 
      13             : #include "i18nlangtag/languagetag.hxx"
      14             : #include "i18nlangtag/applelangid.hxx"
      15             : #include "i18nlangtag/mslangid.hxx"
      16             : #include <rtl/ustrbuf.hxx>
      17             : #include <rtl/bootstrap.hxx>
      18             : #include <osl/file.hxx>
      19             : #include <osl/mutex.hxx>
      20             : #include <rtl/instance.hxx>
      21             : #include <rtl/locale.h>
      22             : #include <map>
      23             : #include <unordered_set>
      24             : 
      25             : //#define erDEBUG
      26             : 
      27             : #if defined(ENABLE_LIBLANGTAG)
      28             : #if LIBLANGTAG_INLINE_FIX
      29             : #define LT_HAVE_INLINE
      30             : #endif
      31             : #include <liblangtag/langtag.h>
      32             : #else
      33             : /* Replacement code for LGPL phobic and Android systems.
      34             :  * For iOS we could probably use NSLocale instead, that should have more or
      35             :  * less required functionality. If it is good enough, it could be used for Mac
      36             :  * OS X, too.
      37             :  */
      38             : #include "simple-langtag.cxx"
      39             : #endif
      40             : 
      41             : using namespace com::sun::star;
      42             : 
      43             : 
      44             : // Helper to ensure lt_error_t is free'd
      45             : struct myLtError
      46             : {
      47             :     lt_error_t* p;
      48         120 :     myLtError() : p(NULL) {}
      49         120 :     ~myLtError() { if (p) lt_error_unref( p); }
      50             : };
      51             : 
      52             : // "statics" to be returned as const reference to an empty locale and string.
      53             : namespace {
      54             : struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
      55             : struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
      56             : }
      57             : 
      58             : typedef std::unordered_set< OUString, OUStringHash > KnownTagSet;
      59             : namespace {
      60             : struct theKnowns : public rtl::Static< KnownTagSet, theKnowns > {};
      61             : struct theMutex : public rtl::Static< osl::Mutex, theMutex > {};
      62             : }
      63             : 
      64        5227 : static const KnownTagSet & getKnowns()
      65             : {
      66        5227 :     KnownTagSet & rKnowns = theKnowns::get();
      67        5227 :     if (rKnowns.empty())
      68             :     {
      69         227 :         osl::MutexGuard aGuard( theMutex::get());
      70         227 :         if (rKnowns.empty())
      71             :         {
      72         227 :             ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
      73      378636 :             for (::std::vector< MsLangId::LanguagetagMapping >::const_iterator it( aDefined.begin());
      74      252424 :                     it != aDefined.end(); ++it)
      75             :             {
      76             :                 // Do not use the BCP47 string here to initialize the
      77             :                 // LanguageTag because then canonicalize() would call this
      78             :                 // getKnowns() again..
      79      125985 :                 ::std::vector< OUString > aFallbacks( LanguageTag( (*it).mnLang).getFallbackStrings( true));
      80      416091 :                 for (::std::vector< OUString >::const_iterator fb( aFallbacks.begin()); fb != aFallbacks.end(); ++fb)
      81             :                 {
      82      290106 :                     rKnowns.insert( *fb);
      83             :                 }
      84      126212 :             }
      85         227 :         }
      86             :     }
      87        5227 :     return rKnowns;
      88             : }
      89             : 
      90             : 
      91             : namespace {
      92             : struct compareIgnoreAsciiCaseLess
      93             : {
      94  8562244273 :     bool operator()( const OUString& r1, const OUString& r2 ) const
      95             :     {
      96  8562244273 :         return r1.compareToIgnoreAsciiCase( r2) < 0;
      97             :     }
      98             : };
      99             : typedef ::std::map< OUString, LanguageTag::ImplPtr, compareIgnoreAsciiCaseLess > MapBcp47;
     100             : typedef ::std::map< LanguageType, LanguageTag::ImplPtr > MapLangID;
     101             : struct theMapBcp47 : public rtl::Static< MapBcp47, theMapBcp47 > {};
     102             : struct theMapLangID : public rtl::Static< MapLangID, theMapLangID > {};
     103             : struct theDontKnow : public rtl::Static< LanguageTag::ImplPtr, theDontKnow > {};
     104             : struct theSystemLocale : public rtl::Static< LanguageTag::ImplPtr, theSystemLocale > {};
     105             : }
     106             : 
     107             : 
     108          19 : static LanguageType getNextOnTheFlyLanguage()
     109             : {
     110             :     static LanguageType nOnTheFlyLanguage = 0;
     111          19 :     osl::MutexGuard aGuard( theMutex::get());
     112          19 :     if (!nOnTheFlyLanguage)
     113          13 :         nOnTheFlyLanguage = MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START, LANGUAGE_ON_THE_FLY_START);
     114             :     else
     115             :     {
     116           6 :         if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage) != LANGUAGE_ON_THE_FLY_END)
     117           6 :             ++nOnTheFlyLanguage;
     118             :         else
     119             :         {
     120           0 :             LanguageType nSub = MsLangId::getSubLanguage( nOnTheFlyLanguage);
     121           0 :             if (nSub != LANGUAGE_ON_THE_FLY_SUB_END)
     122           0 :                 nOnTheFlyLanguage = MsLangId::makeLangID( ++nSub, LANGUAGE_ON_THE_FLY_START);
     123             :             else
     124             :             {
     125             :                 SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
     126             :                         << ((LANGUAGE_ON_THE_FLY_END - LANGUAGE_ON_THE_FLY_START + 1)
     127             :                             * (LANGUAGE_ON_THE_FLY_SUB_END - LANGUAGE_ON_THE_FLY_SUB_START + 1))
     128             :                         << " consumed?!?)");
     129           0 :                 return 0;
     130             :             }
     131             :         }
     132             :     }
     133             : #if OSL_DEBUG_LEVEL > 0
     134             :     static size_t nOnTheFlies = 0;
     135             :     ++nOnTheFlies;
     136             :     SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies);
     137             : #endif
     138          19 :     return nOnTheFlyLanguage;
     139             : }
     140             : 
     141             : 
     142             : // static
     143      203577 : bool LanguageTag::isOnTheFlyID( LanguageType nLang )
     144             : {
     145      203577 :     LanguageType nPri = MsLangId::getPrimaryLanguage( nLang);
     146      203577 :     LanguageType nSub = MsLangId::getSubLanguage( nLang);
     147             :     return
     148         862 :         LANGUAGE_ON_THE_FLY_START <= nPri && nPri <= LANGUAGE_ON_THE_FLY_END &&
     149      204439 :         LANGUAGE_ON_THE_FLY_SUB_START <= nSub && nSub <= LANGUAGE_ON_THE_FLY_SUB_END;
     150             : }
     151             : 
     152             : 
     153             : /** A reference holder for liblangtag data de/initialization, one static
     154             :     instance. Currently implemented such that the first "ref" inits and dtor
     155             :     (our library deinitialized) tears down.
     156             : */
     157             : class LiblangtagDataRef
     158             : {
     159             : public:
     160             :     LiblangtagDataRef();
     161             :     ~LiblangtagDataRef();
     162          75 :     inline void init()
     163             :     {
     164          75 :         if (!mbInitialized)
     165          56 :             setup();
     166          75 :     }
     167             : private:
     168             :     OString maDataPath;   // path to liblangtag data, "|" if system
     169             :     bool mbInitialized;
     170             : 
     171             :     void setupDataPath();
     172             :     void setup();
     173             :     static void teardown();
     174             : };
     175             : 
     176             : namespace {
     177             : struct theDataRef : public rtl::Static< LiblangtagDataRef, theDataRef > {};
     178             : }
     179             : 
     180          56 : LiblangtagDataRef::LiblangtagDataRef()
     181             :     :
     182          56 :         mbInitialized(false)
     183             : {
     184          56 : }
     185             : 
     186         112 : LiblangtagDataRef::~LiblangtagDataRef()
     187             : {
     188          56 :     if (mbInitialized)
     189          56 :         teardown();
     190          56 : }
     191             : 
     192          56 : void LiblangtagDataRef::setup()
     193             : {
     194             :     SAL_INFO( "i18nlangtag", "LiblangtagDataRef::setup: initializing database");
     195          56 :     if (maDataPath.isEmpty())
     196          56 :         setupDataPath();
     197          56 :     lt_db_initialize();
     198          56 :     mbInitialized = true;
     199          56 : }
     200             : 
     201          56 : void LiblangtagDataRef::teardown()
     202             : {
     203             :     SAL_INFO( "i18nlangtag", "LiblangtagDataRef::teardown: finalizing database");
     204          56 :     lt_db_finalize();
     205          56 : }
     206             : 
     207          56 : void LiblangtagDataRef::setupDataPath()
     208             : {
     209             :     // maDataPath is assumed to be empty here.
     210          56 :     OUString aURL("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/liblangtag");
     211          56 :     rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
     212             : 
     213             :     // Check if data is in our own installation, else assume system
     214             :     // installation.
     215         112 :     OUString aData( aURL);
     216          56 :     aData += "/language-subtag-registry.xml";
     217         112 :     osl::DirectoryItem aDirItem;
     218          56 :     if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
     219             :     {
     220           0 :         OUString aPath;
     221           0 :         if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
     222           0 :             maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
     223             :     }
     224          56 :     if (maDataPath.isEmpty())
     225          56 :         maDataPath = "|";   // assume system
     226             :     else
     227          56 :         lt_db_set_datadir( maDataPath.getStr());
     228          56 : }
     229             : 
     230             : 
     231             : /* TODO: we could transform known vendor and browser-specific variants to known
     232             :  * BCP 47 if available. For now just remove them to not confuse any later
     233             :  * treatments that check for empty variants. This vendor stuff was never
     234             :  * supported anyway. */
     235  1390695963 : static void handleVendorVariant( com::sun::star::lang::Locale & rLocale )
     236             : {
     237  1390695963 :     if (!rLocale.Variant.isEmpty() && rLocale.Language != I18NLANGTAG_QLT)
     238           7 :         rLocale.Variant.clear();
     239  1390695963 : }
     240             : 
     241             : 
     242             : class LanguageTagImpl
     243             : {
     244             : public:
     245             : 
     246             :     explicit LanguageTagImpl( const LanguageTag & rLanguageTag );
     247             :     explicit LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl );
     248             :     ~LanguageTagImpl();
     249             :     LanguageTagImpl& operator=( const LanguageTagImpl & rLanguageTagImpl );
     250             : 
     251             : private:
     252             : 
     253             :     friend class LanguageTag;
     254             : 
     255             :     enum Decision
     256             :     {
     257             :         DECISION_DONTKNOW,
     258             :         DECISION_NO,
     259             :         DECISION_YES
     260             :     };
     261             : 
     262             :     mutable com::sun::star::lang::Locale    maLocale;
     263             :     mutable OUString                        maBcp47;
     264             :     mutable OUString                        maCachedLanguage;   ///< cache getLanguage()
     265             :     mutable OUString                        maCachedScript;     ///< cache getScript()
     266             :     mutable OUString                        maCachedCountry;    ///< cache getCountry()
     267             :     mutable OUString                        maCachedVariants;   ///< cache getVariants()
     268             :     mutable lt_tag_t*                       mpImplLangtag;      ///< liblangtag pointer
     269             :     mutable LanguageType                    mnLangID;
     270             :     mutable Decision                        meIsValid;
     271             :     mutable Decision                        meIsIsoLocale;
     272             :     mutable Decision                        meIsIsoODF;
     273             :     mutable Decision                        meIsLiblangtagNeeded;   ///< whether processing with liblangtag needed
     274             :             bool                            mbSystemLocale      : 1;
     275             :     mutable bool                            mbInitializedBcp47  : 1;
     276             :     mutable bool                            mbInitializedLocale : 1;
     277             :     mutable bool                            mbInitializedLangID : 1;
     278             :     mutable bool                            mbCachedLanguage    : 1;
     279             :     mutable bool                            mbCachedScript      : 1;
     280             :     mutable bool                            mbCachedCountry     : 1;
     281             :     mutable bool                            mbCachedVariants    : 1;
     282             : 
     283             :     const OUString &    getBcp47() const;
     284             :     OUString            getLanguage() const;
     285             :     OUString            getScript() const;
     286             :     OUString            getCountry() const;
     287             :     OUString            getRegion() const;
     288             :     OUString            getVariants() const;
     289             :     bool                hasScript() const;
     290             : 
     291             :     bool                isIsoLocale() const;
     292             :     bool                isIsoODF() const;
     293             :     bool                isValidBcp47() const;
     294             : 
     295             :     void                convertLocaleToBcp47();
     296             :     void                convertLocaleToLang( bool bAllowOnTheFlyID );
     297             :     void                convertBcp47ToLocale();
     298             :     void                convertBcp47ToLang();
     299             :     void                convertLangToLocale();
     300             :     void                convertLangToBcp47();
     301             : 
     302             :     /** @return whether BCP 47 language tag string was changed. */
     303             :     bool                canonicalize();
     304             : 
     305             :     /** Canonicalize if not yet done and synchronize initialized conversions.
     306             : 
     307             :         @return whether BCP 47 language tag string was changed.
     308             :      */
     309             :     bool                synCanonicalize();
     310             : 
     311             :     OUString            getLanguageFromLangtag();
     312             :     OUString            getScriptFromLangtag();
     313             :     OUString            getRegionFromLangtag();
     314             :     OUString            getVariantsFromLangtag();
     315             : 
     316             :     /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
     317             : 
     318             :         @param  nRegisterID
     319             :                 If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
     320             :                 instead of generating an on-the-fly ID. Implementation may
     321             :                 still generate an ID if the suggested ID is already used for
     322             :                 another language tag.
     323             : 
     324             :         @return NULL if no ID could be obtained or registration failed.
     325             :      */
     326             :     LanguageTag::ImplPtr registerOnTheFly( LanguageType nRegisterID );
     327             : 
     328             :     /** Obtain Language, Script, Country and Variants via simpleExtract() and
     329             :         assign them to the cached variables if successful.
     330             : 
     331             :         @return return of simpleExtract()
     332             :      */
     333             :     bool                cacheSimpleLSCV();
     334             : 
     335             :     enum Extraction
     336             :     {
     337             :         EXTRACTED_NONE,
     338             :         EXTRACTED_LSC,
     339             :         EXTRACTED_LV,
     340             :         EXTRACTED_X,
     341             :         EXTRACTED_X_JOKER
     342             :     };
     343             : 
     344             :     /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
     345             :         portions.
     346             : 
     347             :         Does not check case or content!
     348             : 
     349             :         @return EXTRACTED_LSC if simple tag was detected (i.e. one that
     350             :                 would fulfill the isIsoODF() condition),
     351             :                 EXTRACTED_LV if a tag with variant was detected,
     352             :                 EXTRACTED_X if x-... privateuse tag was detected,
     353             :                 EXTRACTED_X_JOKER if "*" joker was detected,
     354             :                 EXTRACTED_NONE else.
     355             :      */
     356             :     static Extraction   simpleExtract( const OUString& rBcp47,
     357             :                                        OUString& rLanguage,
     358             :                                        OUString& rScript,
     359             :                                        OUString& rCountry,
     360             :                                        OUString& rVariants );
     361             : 
     362             :     /** Convert Locale to BCP 47 string without resolving system and creating
     363             :         temporary LanguageTag instances. */
     364             :     static OUString     convertToBcp47( const com::sun::star::lang::Locale& rLocale );
     365             : };
     366             : 
     367             : 
     368      330302 : LanguageTagImpl::LanguageTagImpl( const LanguageTag & rLanguageTag )
     369             :     :
     370             :         maLocale( rLanguageTag.maLocale),
     371             :         maBcp47( rLanguageTag.maBcp47),
     372             :         mpImplLangtag( NULL),
     373             :         mnLangID( rLanguageTag.mnLangID),
     374             :         meIsValid( DECISION_DONTKNOW),
     375             :         meIsIsoLocale( DECISION_DONTKNOW),
     376             :         meIsIsoODF( DECISION_DONTKNOW),
     377             :         meIsLiblangtagNeeded( DECISION_DONTKNOW),
     378             :         mbSystemLocale( rLanguageTag.mbSystemLocale),
     379             :         mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
     380             :         mbInitializedLocale( rLanguageTag.mbInitializedLocale),
     381             :         mbInitializedLangID( rLanguageTag.mbInitializedLangID),
     382             :         mbCachedLanguage( false),
     383             :         mbCachedScript( false),
     384             :         mbCachedCountry( false),
     385      330302 :         mbCachedVariants( false)
     386             : {
     387      330302 : }
     388             : 
     389             : 
     390           0 : LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl )
     391             :     :
     392             :         maLocale( rLanguageTagImpl.maLocale),
     393             :         maBcp47( rLanguageTagImpl.maBcp47),
     394             :         maCachedLanguage( rLanguageTagImpl.maCachedLanguage),
     395             :         maCachedScript( rLanguageTagImpl.maCachedScript),
     396             :         maCachedCountry( rLanguageTagImpl.maCachedCountry),
     397             :         maCachedVariants( rLanguageTagImpl.maCachedVariants),
     398             :         mpImplLangtag( rLanguageTagImpl.mpImplLangtag ?
     399           0 :                 lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : NULL),
     400             :         mnLangID( rLanguageTagImpl.mnLangID),
     401             :         meIsValid( rLanguageTagImpl.meIsValid),
     402             :         meIsIsoLocale( rLanguageTagImpl.meIsIsoLocale),
     403             :         meIsIsoODF( rLanguageTagImpl.meIsIsoODF),
     404             :         meIsLiblangtagNeeded( rLanguageTagImpl.meIsLiblangtagNeeded),
     405             :         mbSystemLocale( rLanguageTagImpl.mbSystemLocale),
     406             :         mbInitializedBcp47( rLanguageTagImpl.mbInitializedBcp47),
     407             :         mbInitializedLocale( rLanguageTagImpl.mbInitializedLocale),
     408             :         mbInitializedLangID( rLanguageTagImpl.mbInitializedLangID),
     409             :         mbCachedLanguage( rLanguageTagImpl.mbCachedLanguage),
     410             :         mbCachedScript( rLanguageTagImpl.mbCachedScript),
     411             :         mbCachedCountry( rLanguageTagImpl.mbCachedCountry),
     412           0 :         mbCachedVariants( rLanguageTagImpl.mbCachedVariants)
     413             : {
     414           0 :     if (mpImplLangtag)
     415           0 :         theDataRef::get().init();
     416           0 : }
     417             : 
     418             : 
     419           0 : LanguageTagImpl& LanguageTagImpl::operator=( const LanguageTagImpl & rLanguageTagImpl )
     420             : {
     421           0 :     if (&rLanguageTagImpl == this)
     422           0 :         return *this;
     423             : 
     424           0 :     maLocale            = rLanguageTagImpl.maLocale;
     425           0 :     maBcp47             = rLanguageTagImpl.maBcp47;
     426           0 :     maCachedLanguage    = rLanguageTagImpl.maCachedLanguage;
     427           0 :     maCachedScript      = rLanguageTagImpl.maCachedScript;
     428           0 :     maCachedCountry     = rLanguageTagImpl.maCachedCountry;
     429           0 :     maCachedVariants    = rLanguageTagImpl.maCachedVariants;
     430           0 :     lt_tag_t * oldTag = mpImplLangtag;
     431             :     mpImplLangtag       = rLanguageTagImpl.mpImplLangtag ?
     432           0 :                             lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : NULL;
     433           0 :     lt_tag_unref(oldTag);
     434           0 :     mnLangID            = rLanguageTagImpl.mnLangID;
     435           0 :     meIsValid           = rLanguageTagImpl.meIsValid;
     436           0 :     meIsIsoLocale       = rLanguageTagImpl.meIsIsoLocale;
     437           0 :     meIsIsoODF          = rLanguageTagImpl.meIsIsoODF;
     438           0 :     meIsLiblangtagNeeded= rLanguageTagImpl.meIsLiblangtagNeeded;
     439           0 :     mbSystemLocale      = rLanguageTagImpl.mbSystemLocale;
     440           0 :     mbInitializedBcp47  = rLanguageTagImpl.mbInitializedBcp47;
     441           0 :     mbInitializedLocale = rLanguageTagImpl.mbInitializedLocale;
     442           0 :     mbInitializedLangID = rLanguageTagImpl.mbInitializedLangID;
     443           0 :     mbCachedLanguage    = rLanguageTagImpl.mbCachedLanguage;
     444           0 :     mbCachedScript      = rLanguageTagImpl.mbCachedScript;
     445           0 :     mbCachedCountry     = rLanguageTagImpl.mbCachedCountry;
     446           0 :     mbCachedVariants    = rLanguageTagImpl.mbCachedVariants;
     447           0 :     if (mpImplLangtag && !oldTag)
     448           0 :         theDataRef::get().init();
     449           0 :     return *this;
     450             : }
     451             : 
     452             : 
     453      660350 : LanguageTagImpl::~LanguageTagImpl()
     454             : {
     455      330175 :     if (mpImplLangtag)
     456             :     {
     457          66 :         lt_tag_unref( mpImplLangtag);
     458             :     }
     459      330175 : }
     460             : 
     461             : 
     462      349736 : LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
     463             :     :
     464             :         maBcp47( rBcp47LanguageTag),
     465             :         mnLangID( LANGUAGE_DONTKNOW),
     466      349736 :         mbSystemLocale( rBcp47LanguageTag.isEmpty()),
     467      349736 :         mbInitializedBcp47( !mbSystemLocale),
     468             :         mbInitializedLocale( false),
     469             :         mbInitializedLangID( false),
     470     1049208 :         mbIsFallback( false)
     471             : {
     472      349736 :     if (bCanonicalize)
     473             :     {
     474        8575 :         getImpl()->canonicalize();
     475             :         // Registration itself may already have canonicalized, so do an
     476             :         // unconditional sync.
     477        8575 :         syncFromImpl();
     478             :     }
     479             : 
     480      349736 : }
     481             : 
     482             : 
     483  1390695923 : LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
     484             :     :
     485             :         maLocale( rLocale),
     486             :         mnLangID( LANGUAGE_DONTKNOW),
     487  1390695923 :         mbSystemLocale( rLocale.Language.isEmpty()),
     488             :         mbInitializedBcp47( false),
     489  1390695923 :         mbInitializedLocale( !mbSystemLocale),
     490             :         mbInitializedLangID( false),
     491  4172087769 :         mbIsFallback( false)
     492             : {
     493  1390695923 :     handleVendorVariant( maLocale);
     494  1390695923 : }
     495             : 
     496             : 
     497  1399783858 : LanguageTag::LanguageTag( LanguageType nLanguage )
     498             :     :
     499             :         mnLangID( nLanguage),
     500  1399783858 :         mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
     501             :         mbInitializedBcp47( false),
     502             :         mbInitializedLocale( false),
     503  1399783858 :         mbInitializedLangID( !mbSystemLocale),
     504  4199351574 :         mbIsFallback( false)
     505             : {
     506  1399783858 : }
     507             : 
     508             : 
     509         890 : LanguageTag::LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
     510             :                           const OUString& rScript, const OUString& rCountry )
     511             :     :
     512             :         maBcp47( rBcp47),
     513             :         mnLangID( LANGUAGE_DONTKNOW),
     514         890 :         mbSystemLocale( rBcp47.isEmpty() && rLanguage.isEmpty()),
     515         890 :         mbInitializedBcp47( !rBcp47.isEmpty()),
     516             :         mbInitializedLocale( false),
     517             :         mbInitializedLangID( false),
     518        2670 :         mbIsFallback( false)
     519             : {
     520         890 :     if (!mbSystemLocale && !mbInitializedBcp47)
     521             :     {
     522         890 :         if (rScript.isEmpty())
     523             :         {
     524         890 :             maBcp47 = rLanguage + "-" + rCountry;
     525         890 :             mbInitializedBcp47 = true;
     526         890 :             maLocale.Language = rLanguage;
     527         890 :             maLocale.Country  = rCountry;
     528         890 :             mbInitializedLocale = true;
     529             :         }
     530             :         else
     531             :         {
     532           0 :             if (rCountry.isEmpty())
     533           0 :                 maBcp47 = rLanguage + "-" + rScript;
     534             :             else
     535           0 :                 maBcp47 = rLanguage + "-" + rScript + "-" + rCountry;
     536           0 :             mbInitializedBcp47 = true;
     537           0 :             maLocale.Language = I18NLANGTAG_QLT;
     538           0 :             maLocale.Country  = rCountry;
     539           0 :             maLocale.Variant  = maBcp47;
     540           0 :             mbInitializedLocale = true;
     541             :         }
     542             :     }
     543         890 : }
     544             : 
     545             : 
     546         211 : LanguageTag::LanguageTag( const rtl_Locale & rLocale )
     547             :     :
     548             :         maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
     549             :         mnLangID( LANGUAGE_DONTKNOW),
     550         211 :         mbSystemLocale( maLocale.Language.isEmpty()),
     551             :         mbInitializedBcp47( false),
     552         211 :         mbInitializedLocale( !mbSystemLocale),
     553             :         mbInitializedLangID( false),
     554         633 :         mbIsFallback( false)
     555             : {
     556         211 :     convertFromRtlLocale();
     557         211 : }
     558             : 
     559             : 
     560   150560201 : LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
     561             :     :
     562             :         maLocale( rLanguageTag.maLocale),
     563             :         maBcp47( rLanguageTag.maBcp47),
     564             :         mnLangID( rLanguageTag.mnLangID),
     565             :         mpImpl( rLanguageTag.mpImpl),
     566             :         mbSystemLocale( rLanguageTag.mbSystemLocale),
     567             :         mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
     568             :         mbInitializedLocale( rLanguageTag.mbInitializedLocale),
     569             :         mbInitializedLangID( rLanguageTag.mbInitializedLangID),
     570   150560201 :         mbIsFallback(rLanguageTag.mbIsFallback)
     571             : {
     572   150560201 : }
     573             : 
     574             : 
     575   262162083 : LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
     576             : {
     577   262162083 :     if (&rLanguageTag == this)
     578           0 :         return *this;
     579             : 
     580   262162083 :     maLocale            = rLanguageTag.maLocale;
     581   262162083 :     maBcp47             = rLanguageTag.maBcp47;
     582   262162083 :     mnLangID            = rLanguageTag.mnLangID;
     583   262162083 :     mpImpl              = rLanguageTag.mpImpl;
     584   262162083 :     mbSystemLocale      = rLanguageTag.mbSystemLocale;
     585   262162083 :     mbInitializedBcp47  = rLanguageTag.mbInitializedBcp47;
     586   262162083 :     mbInitializedLocale = rLanguageTag.mbInitializedLocale;
     587   262162083 :     mbInitializedLangID = rLanguageTag.mbInitializedLangID;
     588   262162083 :     return *this;
     589             : }
     590             : 
     591             : 
     592  2941340711 : LanguageTag::~LanguageTag()
     593             : {
     594  2941340711 : }
     595             : 
     596             : 
     597        5028 : LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID )
     598             : {
     599        5028 :     LanguageTag::ImplPtr pImpl;
     600             : 
     601        5028 :     if (!mbInitializedBcp47)
     602             :     {
     603           0 :         if (mbInitializedLocale)
     604             :         {
     605           0 :             maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
     606           0 :             mbInitializedBcp47 = !maBcp47.isEmpty();
     607             :         }
     608             :     }
     609        5028 :     if (maBcp47.isEmpty())
     610             :     {
     611             :         SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
     612           0 :         return pImpl;
     613             :     }
     614             : 
     615       10056 :     osl::MutexGuard aGuard( theMutex::get());
     616             : 
     617        5028 :     MapBcp47& rMapBcp47 = theMapBcp47::get();
     618        5028 :     MapBcp47::const_iterator it( rMapBcp47.find( maBcp47));
     619        5028 :     bool bOtherImpl = false;
     620        5028 :     if (it != rMapBcp47.end())
     621             :     {
     622             :         SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47 << "'");
     623        5028 :         pImpl = (*it).second;
     624        5028 :         if (pImpl.get() != this)
     625             :         {
     626             :             // Could happen for example if during registerImpl() the tag was
     627             :             // changed via canonicalize() and the result was already present in
     628             :             // the map before, for example 'bn-Beng' => 'bn'. This specific
     629             :             // case is now taken care of in registerImpl() and doesn't reach
     630             :             // here. However, use the already existing impl if it matches.
     631             :             SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47 << "'");
     632           0 :             *this = *pImpl;     // ensure consistency
     633           0 :             bOtherImpl = true;
     634             :         }
     635             :     }
     636             :     else
     637             :     {
     638             :         SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47 << "'");
     639           0 :         pImpl.reset( new LanguageTagImpl( *this));
     640           0 :         rMapBcp47.insert( ::std::make_pair( maBcp47, pImpl));
     641             :     }
     642             : 
     643        5028 :     if (!bOtherImpl || !pImpl->mbInitializedLangID)
     644             :     {
     645        5028 :         if (nRegisterID == 0 || nRegisterID == LANGUAGE_DONTKNOW)
     646          17 :             nRegisterID = getNextOnTheFlyLanguage();
     647             :         else
     648             :         {
     649             :             // Accept a suggested ID only if it is not mapped yet to something
     650             :             // different, otherwise we would end up with ambiguous assignments
     651             :             // of different language tags, for example for the same primary
     652             :             // LangID with "no", "nb" and "nn".
     653        5011 :             const MapLangID& rMapLangID = theMapLangID::get();
     654        5011 :             MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
     655        5011 :             if (itID != rMapLangID.end())
     656             :             {
     657           2 :                 if ((*itID).second->maBcp47 != maBcp47)
     658             :                 {
     659             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
     660             :                             << ::std::hex << nRegisterID << " for '" << maBcp47 << "' have '"
     661             :                             << (*itID).second->maBcp47 << "'");
     662           2 :                     nRegisterID = getNextOnTheFlyLanguage();
     663             :                 }
     664             :                 else
     665             :                 {
     666             :                     SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
     667             :                             << ::std::hex << nRegisterID << " for '" << maBcp47 << "' already registered");
     668             :                 }
     669             :             }
     670             :         }
     671        5028 :         if (!nRegisterID)
     672             :         {
     673             :             // out of IDs, nothing to register
     674           0 :             return pImpl;
     675             :         }
     676        5028 :         pImpl->mnLangID = nRegisterID;
     677        5028 :         pImpl->mbInitializedLangID = true;
     678        5028 :         if (pImpl.get() != this)
     679             :         {
     680           0 :             mnLangID = nRegisterID;
     681           0 :             mbInitializedLangID = true;
     682             :         }
     683             :     }
     684             : 
     685             :     ::std::pair< MapLangID::const_iterator, bool > res(
     686        5028 :             theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
     687        5028 :     if (res.second)
     688             :     {
     689             :         SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
     690             :                 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
     691             :     }
     692             :     else
     693             :     {
     694             :         SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
     695             :                 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
     696             :                 << (*res.first).second->maBcp47 << "'");
     697             :     }
     698             : 
     699        5028 :     return pImpl;
     700             : }
     701             : 
     702             : // static
     703         242 : void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang )
     704             : {
     705         242 :     if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_SYSTEM)
     706             :     {
     707             :         SAL_WARN( "i18nlangtag",
     708             :                 "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
     709             :                 ::std::hex << nLang);
     710           0 :         return;
     711             :     }
     712             :     SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex << nLang);
     713         242 :     MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang);
     714             :     // Resest system locale to none and let registerImpl() do the rest to
     715             :     // initialize a new one.
     716         242 :     theSystemLocale::get().reset();
     717         242 :     LanguageTag aLanguageTag( LANGUAGE_SYSTEM);
     718         242 :     aLanguageTag.registerImpl();
     719             : }
     720             : 
     721      203618 : static bool lcl_isKnownOnTheFlyID( LanguageType nLang )
     722             : {
     723      413887 :     return nLang != LANGUAGE_DONTKNOW && nLang != LANGUAGE_SYSTEM &&
     724      610741 :         (LanguageTag::isOnTheFlyID( nLang) || (nLang == MsLangId::getPrimaryLanguage( nLang)));
     725             : }
     726             : 
     727             : 
     728  2701127648 : LanguageTag::ImplPtr LanguageTag::registerImpl() const
     729             : {
     730             :     // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
     731             :     // here as they access getImpl() and syncFromImpl() and would lead to
     732             :     // recursion. Also do not use the static LanguageTag::convertTo...()
     733             :     // methods as they may create temporary LanguageTag instances. Only
     734             :     // LanguageTagImpl::convertToBcp47(Locale) is ok.
     735             : 
     736  2701127648 :     ImplPtr pImpl;
     737             : 
     738             : #if OSL_DEBUG_LEVEL > 0
     739             :     static size_t nCalls = 0;
     740             :     ++nCalls;
     741             :     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls << " calls");
     742             : #endif
     743             : 
     744             :     // Do not register unresolved system locale, also force LangID if system
     745             :     // and take the system locale shortcut if possible.
     746  2701127648 :     if (mbSystemLocale)
     747             :     {
     748      299288 :         pImpl = theSystemLocale::get();
     749      299288 :         if (pImpl)
     750             :         {
     751             : #if OSL_DEBUG_LEVEL > 0
     752             :             static size_t nCallsSystem = 0;
     753             :             ++nCallsSystem;
     754             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem << " system calls");
     755             : #endif
     756      299033 :             return pImpl;
     757             :         }
     758         255 :         if (!mbInitializedLangID)
     759             :         {
     760         255 :             mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
     761         255 :             mbInitializedLangID = (mnLangID != LANGUAGE_SYSTEM);
     762             :             SAL_WARN_IF( !mbInitializedLangID, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
     763             :         }
     764             :     }
     765             : 
     766  2700828615 :     if (mbInitializedLangID)
     767             :     {
     768  1400908906 :         if (mnLangID == LANGUAGE_DONTKNOW)
     769             :         {
     770             :             // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
     771             :             // conversion attempts. At the same time provide a central breakpoint
     772             :             // to inspect such places.
     773     1039173 :             LanguageTag::ImplPtr& rDontKnow = theDontKnow::get();
     774     1039173 :             if (!rDontKnow)
     775         200 :                 rDontKnow.reset( new LanguageTagImpl( *this));
     776     1039173 :             pImpl = rDontKnow;
     777             : #if OSL_DEBUG_LEVEL > 0
     778             :             static size_t nCallsDontKnow = 0;
     779             :             ++nCallsDontKnow;
     780             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow << " DontKnow calls");
     781             : #endif
     782     1039173 :             return pImpl;
     783             :         }
     784             :         else
     785             :         {
     786             :             // A great share are calls for a system equal locale.
     787  1399869733 :             pImpl = theSystemLocale::get();
     788  1399869733 :             if (pImpl && pImpl->mnLangID == mnLangID)
     789             :             {
     790             : #if OSL_DEBUG_LEVEL > 0
     791             :                 static size_t nCallsSystemEqual = 0;
     792             :                 ++nCallsSystemEqual;
     793             :                 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
     794             :                         << " system equal LangID calls");
     795             : #endif
     796   621811624 :                 return pImpl;
     797             :             }
     798             :         }
     799             :     }
     800             : 
     801             :     // Force Bcp47 if not LangID.
     802  2077977818 :     if (!mbInitializedLangID && !mbInitializedBcp47 && mbInitializedLocale)
     803             :     {
     804  1299581443 :         maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
     805  1299581443 :         mbInitializedBcp47 = !maBcp47.isEmpty();
     806             :     }
     807             : 
     808  2077977818 :     if (mbInitializedBcp47)
     809             :     {
     810             :         // A great share are calls for a system equal locale.
     811  1299923114 :         pImpl = theSystemLocale::get();
     812  1299923114 :         if (pImpl && pImpl->maBcp47 == maBcp47)
     813             :         {
     814             : #if OSL_DEBUG_LEVEL > 0
     815             :             static size_t nCallsSystemEqual = 0;
     816             :             ++nCallsSystemEqual;
     817             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual << " system equal BCP47 calls");
     818             : #endif
     819   521813622 :             return pImpl;
     820             :         }
     821             :     }
     822             : 
     823             : #if OSL_DEBUG_LEVEL > 0
     824             :     static size_t nCallsNonSystem = 0;
     825             :     ++nCallsNonSystem;
     826             :     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem << " non-system calls");
     827             : #endif
     828             : 
     829  3112328392 :     osl::MutexGuard aGuard( theMutex::get());
     830             : 
     831             : #if OSL_DEBUG_LEVEL > 0
     832             :     static long nRunning = 0;
     833             :     // Entering twice here is ok, which is needed for fallback init in
     834             :     // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
     835             :     // everything else is suspicious.
     836             :     SAL_WARN_IF( nRunning > 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
     837             :             << maBcp47 << "' 0x" << ::std::hex << mnLangID );
     838             :     struct Runner { Runner() { ++nRunning; } ~Runner() { --nRunning; } } aRunner;
     839             : #endif
     840             : 
     841             :     // Prefer LangID map as find+insert needs less comparison work.
     842  1556164196 :     if (mbInitializedLangID)
     843             :     {
     844   778058109 :         MapLangID& rMap = theMapLangID::get();
     845   778058109 :         MapLangID::const_iterator it( rMap.find( mnLangID));
     846   778058109 :         if (it != rMap.end())
     847             :         {
     848             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex << mnLangID);
     849   777947936 :             pImpl = (*it).second;
     850             :         }
     851             :         else
     852             :         {
     853             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex << mnLangID);
     854      110173 :             pImpl.reset( new LanguageTagImpl( *this));
     855      110173 :             rMap.insert( ::std::make_pair( mnLangID, pImpl));
     856             :             // Try round-trip.
     857      110173 :             if (!pImpl->mbInitializedLocale)
     858      109946 :                 pImpl->convertLangToLocale();
     859      110173 :             LanguageType nLang = MsLangId::Conversion::convertLocaleToLanguage( pImpl->maLocale);
     860             :             // If round-trip is identical cross-insert to Bcp47 map.
     861      110173 :             if (nLang == pImpl->mnLangID)
     862             :             {
     863      103806 :                 if (!pImpl->mbInitializedBcp47)
     864      103579 :                     pImpl->convertLocaleToBcp47();
     865             :                 ::std::pair< MapBcp47::const_iterator, bool > res(
     866      103806 :                         theMapBcp47::get().insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
     867      103806 :                 if (res.second)
     868             :                 {
     869             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID);
     870             :                 }
     871             :                 else
     872             :                 {
     873             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " have 0x"
     874             :                             << ::std::hex << (*res.first).second->mnLangID);
     875             :                 }
     876             :             }
     877             :             else
     878             :             {
     879        6367 :                 if (!pImpl->mbInitializedBcp47)
     880        6367 :                     pImpl->convertLocaleToBcp47();
     881             :                 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " round-trip to 0x" << ::std::hex << nLang);
     882             :             }
     883             :         }
     884             :     }
     885   778106087 :     else if (!maBcp47.isEmpty())
     886             :     {
     887   778106087 :         MapBcp47& rMap = theMapBcp47::get();
     888   778106087 :         MapBcp47::const_iterator it( rMap.find( maBcp47));
     889   778106087 :         if (it != rMap.end())
     890             :         {
     891             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47 << "'");
     892   777886158 :             pImpl = (*it).second;
     893             :         }
     894             :         else
     895             :         {
     896             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47 << "'");
     897      219929 :             pImpl.reset( new LanguageTagImpl( *this));
     898      219929 :             ::std::pair< MapBcp47::iterator, bool > insOrig( rMap.insert( ::std::make_pair( maBcp47, pImpl)));
     899             :             // If changed after canonicalize() also add the resulting tag to
     900             :             // the map.
     901      219929 :             if (pImpl->synCanonicalize())
     902             :             {
     903             :                 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl->maBcp47 << "'");
     904             :                 ::std::pair< MapBcp47::const_iterator, bool > insCanon(
     905       21726 :                         rMap.insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
     906             :                 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon.second ? "" : "not ")
     907             :                         << "inserted '" << pImpl->maBcp47 << "'");
     908             :                 // If the canonicalized tag already existed (was not inserted)
     909             :                 // and impls are different, make this impl that impl and skip
     910             :                 // the rest if that LangID is present as well. The existing
     911             :                 // entry may or may not be different, it may even be strictly
     912             :                 // identical to this if it differs only in case (e.g. ko-kr =>
     913             :                 // ko-KR) which was corrected in canonicalize() hence also in
     914             :                 // the map entry but comparison is case insensitive and found
     915             :                 // it again.
     916       21726 :                 if (!insCanon.second && (*insCanon.first).second != pImpl)
     917             :                 {
     918       16304 :                     (*insOrig.first).second = pImpl = (*insCanon.first).second;
     919             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
     920             :                             << ::std::hex << pImpl->mnLangID);
     921             :                 }
     922             :             }
     923      219929 :             if (!pImpl->mbInitializedLangID)
     924             :             {
     925             :                 // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
     926      203618 :                 if (!pImpl->mbInitializedLocale)
     927      203615 :                     pImpl->convertBcp47ToLocale();
     928      203618 :                 if (!pImpl->mbInitializedLangID)
     929      203618 :                     pImpl->convertLocaleToLang( true);
     930             :                 // Unconditionally insert (round-trip is possible) for
     931             :                 // on-the-fly IDs and (generated or not) suggested IDs.
     932      203618 :                 bool bInsert = lcl_isKnownOnTheFlyID( pImpl->mnLangID);
     933      203618 :                 OUString aBcp47;
     934      203618 :                 if (!bInsert)
     935             :                 {
     936      196920 :                     if (pImpl->mnLangID != LANGUAGE_DONTKNOW)
     937             :                     {
     938             :                         // May have involved canonicalize(), so compare with
     939             :                         // pImpl->maBcp47 instead of maBcp47!
     940      393746 :                         aBcp47 = LanguageTagImpl::convertToBcp47(
     941      393746 :                                 MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true));
     942      196873 :                         bInsert = (aBcp47 == pImpl->maBcp47);
     943             :                     }
     944             :                 }
     945             :                 // If round-trip is identical cross-insert to Bcp47 map.
     946      203618 :                 if (bInsert)
     947             :                 {
     948             :                     ::std::pair< MapLangID::const_iterator, bool > res(
     949      191480 :                             theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
     950      191480 :                     if (res.second)
     951             :                     {
     952             :                         SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
     953             :                                 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
     954             :                     }
     955             :                     else
     956             :                     {
     957             :                         SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
     958             :                                 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
     959             :                                 << (*res.first).second->maBcp47 << "'");
     960             :                     }
     961             :                 }
     962             :                 else
     963             :                 {
     964             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
     965             :                             << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' round-trip to '"
     966             :                             << aBcp47 << "'");
     967      203618 :                 }
     968             :             }
     969             :         }
     970             :     }
     971             :     else
     972             :     {
     973             :         SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex << mnLangID );
     974           0 :         pImpl.reset( new LanguageTagImpl( *this));
     975             :     }
     976             : 
     977             :     // If we reach here for mbSystemLocale we didn't have theSystemLocale
     978             :     // above, so add it.
     979  1556164196 :     if (mbSystemLocale && mbInitializedLangID)
     980             :     {
     981         255 :         theSystemLocale::get() = pImpl;
     982             :         SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
     983             :                 << ::std::hex << pImpl->mnLangID << " '" << pImpl->maBcp47 << "'");
     984             :     }
     985             : 
     986  1556164196 :     return pImpl;
     987             : }
     988             : 
     989             : 
     990  2703609803 : LanguageTag::ImplPtr LanguageTag::getImpl() const
     991             : {
     992  2703609803 :     if (!mpImpl)
     993             :     {
     994  2701124001 :         mpImpl = registerImpl();
     995  2701124001 :         syncVarsFromRawImpl();
     996             :     }
     997  2703609803 :     return mpImpl;
     998             : }
     999             : 
    1000             : 
    1001     4021897 : void LanguageTag::resetVars()
    1002             : {
    1003     4021897 :     mpImpl.reset();
    1004     4021897 :     maLocale            = lang::Locale();
    1005     4021897 :     maBcp47.clear();
    1006     4021897 :     mnLangID            = LANGUAGE_SYSTEM;
    1007     4021897 :     mbSystemLocale      = true;
    1008     4021897 :     mbInitializedBcp47  = false;
    1009     4021897 :     mbInitializedLocale = false;
    1010     4021897 :     mbInitializedLangID = false;
    1011     4021897 :     mbIsFallback        = false;
    1012     4021897 : }
    1013             : 
    1014             : 
    1015         568 : LanguageTag & LanguageTag::reset( const OUString & rBcp47LanguageTag, bool bCanonicalize )
    1016             : {
    1017         568 :     resetVars();
    1018         568 :     maBcp47             = rBcp47LanguageTag;
    1019         568 :     mbSystemLocale      = rBcp47LanguageTag.isEmpty();
    1020         568 :     mbInitializedBcp47  = !mbSystemLocale;
    1021             : 
    1022         568 :     if (bCanonicalize)
    1023             :     {
    1024           0 :         getImpl()->canonicalize();
    1025             :         // Registration itself may already have canonicalized, so do an
    1026             :         // unconditional sync.
    1027           0 :         syncFromImpl();
    1028             :     }
    1029         568 :     return *this;
    1030             : }
    1031             : 
    1032             : 
    1033          40 : LanguageTag & LanguageTag::reset( const com::sun::star::lang::Locale & rLocale )
    1034             : {
    1035          40 :     resetVars();
    1036          40 :     maLocale            = rLocale;
    1037          40 :     mbSystemLocale      = rLocale.Language.isEmpty();
    1038          40 :     mbInitializedLocale = !mbSystemLocale;
    1039          40 :     handleVendorVariant( maLocale);
    1040          40 :     return *this;
    1041             : }
    1042             : 
    1043             : 
    1044     4021289 : LanguageTag & LanguageTag::reset( LanguageType nLanguage )
    1045             : {
    1046     4021289 :     resetVars();
    1047     4021289 :     mnLangID            = nLanguage;
    1048     4021289 :     mbSystemLocale      = nLanguage == LANGUAGE_SYSTEM;
    1049     4021289 :     mbInitializedLangID = !mbSystemLocale;
    1050     4021289 :     return *this;
    1051             : }
    1052             : 
    1053             : 
    1054      338819 : bool LanguageTagImpl::canonicalize()
    1055             : {
    1056             : #ifdef erDEBUG
    1057             :     // dump once
    1058             :     struct dumper
    1059             :     {
    1060             :         lt_tag_t** mpp;
    1061             :         dumper( lt_tag_t** pp ) : mpp( *pp ? NULL : pp) {}
    1062             :         ~dumper() { if (mpp && *mpp) lt_tag_dump( *mpp); }
    1063             :     };
    1064             :     dumper aDumper( &mpImplLangtag);
    1065             : #endif
    1066             : 
    1067      338819 :     bool bChanged = false;
    1068             : 
    1069             :     // Side effect: have maBcp47 in any case, resolved system.
    1070             :     // Some methods calling canonicalize() (or not calling it due to
    1071             :     // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
    1072             :     // meIsLiblangtagNeeded anywhere else than hereafter.
    1073      338819 :     getBcp47();
    1074             : 
    1075             :     // The simple cases and known locales don't need liblangtag processing,
    1076             :     // which also avoids loading liblangtag data on startup.
    1077      338819 :     if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
    1078             :     {
    1079      330244 :         bool bTemporaryLocale = false;
    1080      330244 :         bool bTemporaryLangID = false;
    1081      330244 :         if (!mbInitializedLocale && !mbInitializedLangID)
    1082             :         {
    1083      219919 :             if (mbSystemLocale)
    1084             :             {
    1085           0 :                 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
    1086           0 :                 mbInitializedLangID = true;
    1087             :             }
    1088             :             else
    1089             :             {
    1090             :                 // Now this is getting funny.. we only have some BCP47 string
    1091             :                 // and want to determine if parsing it would be possible
    1092             :                 // without using liblangtag just to see if it is a simple known
    1093             :                 // locale or could fall back to one.
    1094      439838 :                 OUString aLanguage, aScript, aCountry, aVariants;
    1095      219919 :                 Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
    1096      219919 :                 if (eExt != EXTRACTED_NONE)
    1097             :                 {
    1098      219869 :                     if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV)
    1099             :                     {
    1100             :                         // Rebuild bcp47 with proper casing of tags.
    1101      219854 :                         OUStringBuffer aBuf( aLanguage.getLength() + 1 + aScript.getLength() +
    1102      219854 :                                 1 + aCountry.getLength() + 1 + aVariants.getLength());
    1103      219854 :                         aBuf.append( aLanguage);
    1104      219854 :                         if (!aScript.isEmpty())
    1105       28368 :                             aBuf.append("-" + aScript);
    1106      219854 :                         if (!aCountry.isEmpty())
    1107      191472 :                             aBuf.append("-" + aCountry);
    1108      219854 :                         if (!aVariants.isEmpty())
    1109         837 :                             aBuf.append("-" + aVariants);
    1110      439708 :                         OUString aStr( aBuf.makeStringAndClear());
    1111             : 
    1112      219854 :                         if (maBcp47 != aStr)
    1113             :                         {
    1114           0 :                             maBcp47 = aStr;
    1115           0 :                             bChanged = true;
    1116      219854 :                         }
    1117             :                     }
    1118      219869 :                     if (eExt == EXTRACTED_LSC && aScript.isEmpty())
    1119             :                     {
    1120      190649 :                         maLocale.Language = aLanguage;
    1121      190649 :                         maLocale.Country  = aCountry;
    1122             :                     }
    1123             :                     else
    1124             :                     {
    1125       29220 :                         maLocale.Language = I18NLANGTAG_QLT;
    1126       29220 :                         maLocale.Country  = aCountry;
    1127       29220 :                         maLocale.Variant  = maBcp47;
    1128             :                     }
    1129      219869 :                     bTemporaryLocale = mbInitializedLocale = true;
    1130      219919 :                 }
    1131             :             }
    1132             :         }
    1133      330244 :         if (mbInitializedLangID && !mbInitializedLocale)
    1134             :         {
    1135             :             // Do not call getLocale() here because that prefers
    1136             :             // convertBcp47ToLocale() which would end up in recursion via
    1137             :             // isIsoLocale()!
    1138             : 
    1139             :             // Prepare to verify that we have a known locale, not just an
    1140             :             // arbitrary MS-LangID.
    1141           0 :             convertLangToLocale();
    1142             :         }
    1143      330244 :         if (mbInitializedLocale)
    1144             :         {
    1145      330194 :             if (!mbInitializedLangID)
    1146             :             {
    1147      219879 :                 convertLocaleToLang( false);
    1148      219879 :                 if (bTemporaryLocale || mnLangID == LANGUAGE_DONTKNOW)
    1149      219872 :                     bTemporaryLangID = true;
    1150             :             }
    1151      330194 :             if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
    1152      324967 :                 meIsLiblangtagNeeded = DECISION_NO; // known locale
    1153             :             else
    1154             :             {
    1155        5227 :                 const KnownTagSet& rKnowns = getKnowns();
    1156        5227 :                 if (rKnowns.find( maBcp47) != rKnowns.end())
    1157        5211 :                     meIsLiblangtagNeeded = DECISION_NO; // known fallback
    1158             :             }
    1159             :             // We may have an internal override "canonicalization".
    1160      330194 :             lang::Locale aNew( MsLangId::Conversion::getOverride( maLocale));
    1161      680502 :             if (!aNew.Language.isEmpty() &&
    1162      625647 :                     (aNew.Language != maLocale.Language ||
    1163      600518 :                      aNew.Country  != maLocale.Country ||
    1164      300050 :                      aNew.Variant  != maLocale.Variant))
    1165             :             {
    1166       25129 :                 maBcp47 = LanguageTagImpl::convertToBcp47( aNew);
    1167       25129 :                 bChanged = true;
    1168       25129 :                 meIsIsoLocale = DECISION_DONTKNOW;
    1169       25129 :                 meIsIsoODF = DECISION_DONTKNOW;
    1170       25129 :                 meIsLiblangtagNeeded = DECISION_NO; // known locale
    1171      330194 :             }
    1172             :         }
    1173      330244 :         if (bTemporaryLocale)
    1174             :         {
    1175      219869 :             mbInitializedLocale = false;
    1176      219869 :             maLocale = lang::Locale();
    1177             :         }
    1178      330244 :         if (bTemporaryLangID)
    1179             :         {
    1180      219872 :             mbInitializedLangID = false;
    1181      219872 :             mnLangID = LANGUAGE_DONTKNOW;
    1182             :         }
    1183             :     }
    1184      338819 :     if (meIsLiblangtagNeeded == DECISION_NO)
    1185             :     {
    1186      338708 :         meIsValid = DECISION_YES;   // really, known must be valid ...
    1187      338708 :         return bChanged;            // that's it
    1188             :     }
    1189             : 
    1190         111 :     meIsLiblangtagNeeded = DECISION_YES;
    1191             :     SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
    1192             : 
    1193         111 :     if (!mpImplLangtag)
    1194             :     {
    1195          66 :         theDataRef::get().init();
    1196          66 :         mpImplLangtag = lt_tag_new();
    1197             :     }
    1198             : 
    1199         111 :     myLtError aError;
    1200             : 
    1201         111 :     if (lt_tag_parse( mpImplLangtag, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
    1202             :     {
    1203          19 :         char* pTag = lt_tag_canonicalize( mpImplLangtag, &aError.p);
    1204             :         SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
    1205          19 :         if (pTag)
    1206             :         {
    1207          19 :             OUString aNew( OUString::createFromAscii( pTag));
    1208             :             // Make the lt_tag_t follow the new string if different, which
    1209             :             // removes default script and such.
    1210          19 :             if (maBcp47 != aNew)
    1211             :             {
    1212           2 :                 maBcp47 = aNew;
    1213           2 :                 bChanged = true;
    1214           2 :                 meIsIsoLocale = DECISION_DONTKNOW;
    1215           2 :                 meIsIsoODF = DECISION_DONTKNOW;
    1216           2 :                 if (!lt_tag_parse( mpImplLangtag, pTag, &aError.p))
    1217             :                 {
    1218             :                     SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '" << maBcp47 << "'");
    1219           0 :                     free( pTag);
    1220           0 :                     meIsValid = DECISION_NO;
    1221           0 :                     return bChanged;
    1222             :                 }
    1223             :             }
    1224          19 :             free( pTag);
    1225          19 :             meIsValid = DECISION_YES;
    1226          19 :             return bChanged;
    1227             :         }
    1228             :     }
    1229             :     else
    1230             :     {
    1231             :         SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
    1232             :     }
    1233          92 :     meIsValid = DECISION_NO;
    1234          92 :     return bChanged;
    1235             : }
    1236             : 
    1237             : 
    1238     1485332 : bool LanguageTagImpl::synCanonicalize()
    1239             : {
    1240     1485332 :     bool bChanged = false;
    1241     1485332 :     if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
    1242             :     {
    1243      330244 :         bChanged = canonicalize();
    1244      330244 :         if (bChanged)
    1245             :         {
    1246       25131 :             if (mbInitializedLocale)
    1247        3405 :                 convertBcp47ToLocale();
    1248       25131 :             if (mbInitializedLangID)
    1249        3405 :                 convertBcp47ToLang();
    1250             :         }
    1251             :     }
    1252     1485332 :     return bChanged;
    1253             : }
    1254             : 
    1255             : 
    1256      751833 : void LanguageTag::syncFromImpl()
    1257             : {
    1258      751833 :     ImplPtr xImpl = getImpl();
    1259      751833 :     LanguageTagImpl* pImpl = xImpl.get();
    1260      755238 :     bool bRegister = ((mbInitializedBcp47 && maBcp47 != pImpl->maBcp47) ||
    1261     1500261 :             (mbInitializedLangID && mnLangID != pImpl->mnLangID));
    1262             :     SAL_INFO_IF( bRegister, "i18nlangtag",
    1263             :             "LanguageTag::syncFromImpl: re-registering, '" << pImpl->maBcp47 << "' vs '" << maBcp47 <<
    1264             :             " and 0x" << ::std::hex << pImpl->mnLangID << " vs 0x" << ::std::hex << mnLangID);
    1265      751833 :     syncVarsFromRawImpl();
    1266      751833 :     if (bRegister)
    1267        3405 :         mpImpl = registerImpl();
    1268      751833 : }
    1269             : 
    1270             : 
    1271  2699210178 : void LanguageTag::syncVarsFromImpl() const
    1272             : {
    1273  2699210178 :     if (!mpImpl)
    1274  2699210178 :         getImpl();      // with side effect syncVarsFromRawImpl()
    1275             :     else
    1276           0 :         syncVarsFromRawImpl();
    1277  2699210178 : }
    1278             : 
    1279             : 
    1280  2701875834 : void LanguageTag::syncVarsFromRawImpl() const
    1281             : {
    1282             :     // Do not use getImpl() here.
    1283  2701875834 :     LanguageTagImpl* pImpl = mpImpl.get();
    1284  2701875834 :     if (!pImpl)
    1285  2701875834 :         return;
    1286             : 
    1287             :     // Obviously only mutable variables.
    1288  2701875834 :     mbInitializedBcp47  = pImpl->mbInitializedBcp47;
    1289  2701875834 :     maBcp47             = pImpl->maBcp47;
    1290  2701875834 :     mbInitializedLocale = pImpl->mbInitializedLocale;
    1291  2701875834 :     maLocale            = pImpl->maLocale;
    1292  2701875834 :     mbInitializedLangID = pImpl->mbInitializedLangID;
    1293  2701875834 :     mnLangID            = pImpl->mnLangID;
    1294             : }
    1295             : 
    1296             : 
    1297           0 : bool LanguageTag::synCanonicalize()
    1298             : {
    1299           0 :     bool bChanged = getImpl()->synCanonicalize();
    1300           0 :     if (bChanged)
    1301           0 :         syncFromImpl();
    1302           0 :     return bChanged;
    1303             : }
    1304             : 
    1305             : 
    1306      110146 : void LanguageTagImpl::convertLocaleToBcp47()
    1307             : {
    1308      110146 :     if (mbSystemLocale && !mbInitializedLocale)
    1309           0 :         convertLangToLocale();
    1310             : 
    1311      110146 :     if (maLocale.Language.isEmpty())
    1312             :     {
    1313             :         // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
    1314             :         // locale via LanguageTag::convertToBcp47(LanguageType) and
    1315             :         // LanguageTag::convertToLocale(LanguageType) would instanciate another
    1316             :         // LanguageTag.
    1317           3 :         maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, true);
    1318             :     }
    1319      110146 :     if (maLocale.Language.isEmpty())
    1320             :     {
    1321           0 :         maBcp47.clear();   // bad luck
    1322             :     }
    1323      110146 :     else if (maLocale.Language == I18NLANGTAG_QLT)
    1324             :     {
    1325       16168 :         maBcp47 = maLocale.Variant;
    1326       16168 :         meIsIsoLocale = DECISION_NO;
    1327             :     }
    1328             :     else
    1329             :     {
    1330       93978 :         maBcp47 = LanguageTag::convertToBcp47( maLocale, true);
    1331             :     }
    1332      110146 :     mbInitializedBcp47 = true;
    1333      110146 : }
    1334             : 
    1335             : 
    1336      426902 : void LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID )
    1337             : {
    1338      426902 :     if (mbSystemLocale)
    1339             :     {
    1340           0 :         mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
    1341             :     }
    1342             :     else
    1343             :     {
    1344      426902 :         mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
    1345      426902 :         if (mnLangID == LANGUAGE_DONTKNOW && bAllowOnTheFlyID)
    1346             :         {
    1347        5075 :             if (isValidBcp47())
    1348             :             {
    1349             :                 // For language-only (including script) look if we know some
    1350             :                 // locale of that language and if so try to use the primary
    1351             :                 // language ID of that instead of generating an on-the-fly ID.
    1352        5028 :                 if (getCountry().isEmpty() && isIsoODF())
    1353             :                 {
    1354        5013 :                     lang::Locale aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale));
    1355             :                     // 'en-US' is last resort, do not use except when looking
    1356             :                     // for 'en'.
    1357        5013 :                     if (aLoc.Language != "en" || getLanguage() == "en")
    1358             :                     {
    1359        5011 :                         mnLangID = MsLangId::Conversion::convertLocaleToLanguage( aLoc);
    1360        5011 :                         if (mnLangID != LANGUAGE_DONTKNOW)
    1361        5011 :                             mnLangID = MsLangId::getPrimaryLanguage( mnLangID);
    1362        5013 :                     }
    1363             :                 }
    1364        5028 :                 registerOnTheFly( mnLangID);
    1365             :             }
    1366             :             else
    1367             :             {
    1368             :                 SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
    1369             :                         << maBcp47 << "'");
    1370             :             }
    1371             :         }
    1372             :     }
    1373      426902 :     mbInitializedLangID = true;
    1374      426902 : }
    1375             : 
    1376             : 
    1377           0 : void LanguageTag::convertLocaleToLang()
    1378             : {
    1379           0 :     getImpl()->convertLocaleToLang( true);
    1380           0 :     syncFromImpl();
    1381           0 : }
    1382             : 
    1383             : 
    1384      207020 : void LanguageTagImpl::convertBcp47ToLocale()
    1385             : {
    1386      207020 :     bool bIso = isIsoLocale();
    1387      207020 :     if (bIso)
    1388             :     {
    1389      182948 :         maLocale.Language = getLanguageFromLangtag();
    1390      182948 :         maLocale.Country = getRegionFromLangtag();
    1391      182948 :         maLocale.Variant.clear();
    1392             :     }
    1393             :     else
    1394             :     {
    1395       24072 :         maLocale.Language = I18NLANGTAG_QLT;
    1396       24072 :         maLocale.Country = getCountry();
    1397       24072 :         maLocale.Variant = maBcp47;
    1398             :     }
    1399      207020 :     mbInitializedLocale = true;
    1400      207020 : }
    1401             : 
    1402             : 
    1403           0 : void LanguageTag::convertBcp47ToLocale()
    1404             : {
    1405           0 :     getImpl()->convertBcp47ToLocale();
    1406           0 :     syncFromImpl();
    1407           0 : }
    1408             : 
    1409             : 
    1410        3405 : void LanguageTagImpl::convertBcp47ToLang()
    1411             : {
    1412        3405 :     if (mbSystemLocale)
    1413             :     {
    1414           0 :         mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
    1415             :     }
    1416             :     else
    1417             :     {
    1418        3405 :         if (!mbInitializedLocale)
    1419           0 :             convertBcp47ToLocale();
    1420        3405 :         convertLocaleToLang( true);
    1421             :     }
    1422        3405 :     mbInitializedLangID = true;
    1423        3405 : }
    1424             : 
    1425             : 
    1426           0 : void LanguageTag::convertBcp47ToLang()
    1427             : {
    1428           0 :     getImpl()->convertBcp47ToLang();
    1429           0 :     syncFromImpl();
    1430           0 : }
    1431             : 
    1432             : 
    1433      110146 : void LanguageTagImpl::convertLangToLocale()
    1434             : {
    1435      110146 :     if (mbSystemLocale && !mbInitializedLangID)
    1436             :     {
    1437           0 :         mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
    1438           0 :         mbInitializedLangID = true;
    1439             :     }
    1440             :     // Resolve system here! The original is remembered as mbSystemLocale.
    1441      110146 :     maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true);
    1442      110146 :     mbInitializedLocale = true;
    1443      110146 : }
    1444             : 
    1445             : 
    1446           0 : void LanguageTag::convertLangToLocale()
    1447             : {
    1448           0 :     getImpl()->convertLangToLocale();
    1449           0 :     syncFromImpl();
    1450           0 : }
    1451             : 
    1452             : 
    1453         200 : void LanguageTagImpl::convertLangToBcp47()
    1454             : {
    1455         200 :     if (!mbInitializedLocale)
    1456         200 :         convertLangToLocale();
    1457         200 :     convertLocaleToBcp47();
    1458         200 :     mbInitializedBcp47 = true;
    1459         200 : }
    1460             : 
    1461             : 
    1462         211 : void LanguageTag::convertFromRtlLocale()
    1463             : {
    1464             :     // The rtl_Locale follows the Open Group Base Specification,
    1465             :     // 8.2 Internationalization Variables
    1466             :     // language[_territory][.codeset][@modifier]
    1467             :     // On GNU/Linux systems usually being glibc locales.
    1468             :     // sal/osl/unx/nlsupport.c _parse_locale() parses them into
    1469             :     // Language: language               2 or 3 alpha code
    1470             :     // Country: [territory]             2 alpha code
    1471             :     // Variant: [.codeset][@modifier]
    1472             :     // Variant effectively contains anything that follows the territory, not
    1473             :     // looking for '.' dot delimiter or '@' modifier content.
    1474         211 :     if (!maLocale.Variant.isEmpty())
    1475             :     {
    1476         422 :         OString aStr = OUStringToOString( maLocale.Language + "_" + maLocale.Country + maLocale.Variant,
    1477         211 :                 RTL_TEXTENCODING_UTF8);
    1478             :         /* FIXME: let liblangtag parse this entirely with
    1479             :          * lt_tag_convert_from_locale() but that needs a patch to pass the
    1480             :          * string. */
    1481             : #if 0
    1482             :         myLtError aError;
    1483             :         theDataRef::get().init();
    1484             :         mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
    1485             :         maBcp47 = OStringToOUString( lt_tag_get_string( mpImplLangtag), RTL_TEXTENCODING_UTF8);
    1486             :         mbInitializedBcp47 = true;
    1487             : #else
    1488         211 :         mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr);
    1489         211 :         if (mnLangID == LANGUAGE_DONTKNOW)
    1490             :         {
    1491             :             SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
    1492           0 :             mnLangID = LANGUAGE_ENGLISH_US;     // we need _something_ here
    1493             :         }
    1494         211 :         mbInitializedLangID = true;
    1495             : #endif
    1496         211 :         maLocale = lang::Locale();
    1497         211 :         mbInitializedLocale = false;
    1498             :     }
    1499         211 : }
    1500             : 
    1501             : 
    1502      339005 : const OUString & LanguageTagImpl::getBcp47() const
    1503             : {
    1504      339005 :     if (!mbInitializedBcp47)
    1505             :     {
    1506         200 :         if (mbInitializedLocale)
    1507           0 :             const_cast<LanguageTagImpl*>(this)->convertLocaleToBcp47();
    1508             :         else
    1509         200 :             const_cast<LanguageTagImpl*>(this)->convertLangToBcp47();
    1510             :     }
    1511      339005 :     return maBcp47;
    1512             : }
    1513             : 
    1514             : 
    1515   171049269 : const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
    1516             : {
    1517   171049269 :     if (!bResolveSystem && mbSystemLocale)
    1518        7827 :         return theEmptyBcp47::get();
    1519   171041442 :     if (!mbInitializedBcp47)
    1520   170962487 :         syncVarsFromImpl();
    1521   171041442 :     if (!mbInitializedBcp47)
    1522             :     {
    1523         186 :         getImpl()->getBcp47();
    1524         186 :         const_cast<LanguageTag*>(this)->syncFromImpl();
    1525             :     }
    1526   171041442 :     return maBcp47;
    1527             : }
    1528             : 
    1529             : 
    1530      476007 : OUString LanguageTagImpl::getLanguageFromLangtag()
    1531             : {
    1532      476007 :     OUString aLanguage;
    1533      476007 :     synCanonicalize();
    1534      476007 :     if (maBcp47.isEmpty())
    1535           0 :         return aLanguage;
    1536      476007 :     if (mpImplLangtag)
    1537             :     {
    1538          21 :         const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag);
    1539             :         SAL_WARN_IF( !pLangT, "i18nlangtag",
    1540             :                 "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'");
    1541          21 :         if (!pLangT)
    1542          12 :             return aLanguage;
    1543           9 :         const char* pLang = lt_lang_get_tag( pLangT);
    1544             :         SAL_WARN_IF( !pLang, "i18nlangtag",
    1545             :                 "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'");
    1546           9 :         if (pLang)
    1547           9 :             aLanguage = OUString::createFromAscii( pLang);
    1548             :     }
    1549             :     else
    1550             :     {
    1551      475986 :         if (mbCachedLanguage || cacheSimpleLSCV())
    1552      475984 :             aLanguage = maCachedLanguage;
    1553             :     }
    1554      475995 :     return aLanguage;
    1555             : }
    1556             : 
    1557             : 
    1558          18 : OUString LanguageTagImpl::getScriptFromLangtag()
    1559             : {
    1560          18 :     OUString aScript;
    1561          18 :     synCanonicalize();
    1562          18 :     if (maBcp47.isEmpty())
    1563           0 :         return aScript;
    1564          18 :     if (mpImplLangtag)
    1565             :     {
    1566          18 :         const lt_script_t* pScriptT = lt_tag_get_script( mpImplLangtag);
    1567             :         // pScriptT==NULL is valid for default scripts
    1568          18 :         if (!pScriptT)
    1569          17 :             return aScript;
    1570           1 :         const char* pScript = lt_script_get_tag( pScriptT);
    1571             :         SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
    1572           1 :         if (pScript)
    1573           1 :             aScript = OUString::createFromAscii( pScript);
    1574             :     }
    1575             :     else
    1576             :     {
    1577           0 :         if (mbCachedScript || cacheSimpleLSCV())
    1578           0 :             aScript = maCachedScript;
    1579             :     }
    1580           1 :     return aScript;
    1581             : }
    1582             : 
    1583             : 
    1584      483757 : OUString LanguageTagImpl::getRegionFromLangtag()
    1585             : {
    1586      483757 :     OUString aRegion;
    1587      483757 :     synCanonicalize();
    1588      483757 :     if (maBcp47.isEmpty())
    1589           0 :         return aRegion;
    1590      483757 :     if (mpImplLangtag)
    1591             :     {
    1592          74 :         const lt_region_t* pRegionT = lt_tag_get_region( mpImplLangtag);
    1593             :         // pRegionT==NULL is valid for language only tags, rough check here
    1594             :         // that does not take sophisticated tags into account that actually
    1595             :         // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
    1596             :         // that ll-CC and lll-CC actually fail.
    1597             :         SAL_WARN_IF( !pRegionT &&
    1598             :                 maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
    1599             :                 maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
    1600             :                 "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'");
    1601          74 :         if (!pRegionT)
    1602          71 :             return aRegion;
    1603           3 :         const char* pRegion = lt_region_get_tag( pRegionT);
    1604             :         SAL_WARN_IF( !pRegion, "i18nlangtag",
    1605             :                 "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'");
    1606           3 :         if (pRegion)
    1607           3 :             aRegion = OUString::createFromAscii( pRegion);
    1608             :     }
    1609             :     else
    1610             :     {
    1611      483683 :         if (mbCachedCountry || cacheSimpleLSCV())
    1612      483680 :             aRegion = maCachedCountry;
    1613             :     }
    1614      483686 :     return aRegion;
    1615             : }
    1616             : 
    1617             : 
    1618           2 : OUString LanguageTagImpl::getVariantsFromLangtag()
    1619             : {
    1620           2 :     OUString aVariants;
    1621           2 :     synCanonicalize();
    1622           2 :     if (maBcp47.isEmpty())
    1623           0 :         return aVariants;
    1624           2 :     if (mpImplLangtag)
    1625             :     {
    1626           2 :         const lt_list_t* pVariantsT = lt_tag_get_variants( mpImplLangtag);
    1627           3 :         for (const lt_list_t* pE = pVariantsT; pE; pE = lt_list_next( pE))
    1628             :         {
    1629           1 :             const lt_variant_t* pVariantT = static_cast<const lt_variant_t*>(lt_list_value( pE));
    1630           1 :             if (pVariantT)
    1631             :             {
    1632           1 :                 const char* p = lt_variant_get_tag( pVariantT);
    1633           1 :                 if (p)
    1634             :                 {
    1635           1 :                     if (aVariants.isEmpty())
    1636           1 :                         aVariants = OUString::createFromAscii( p);
    1637             :                     else
    1638           0 :                         aVariants += "-" + OUString::createFromAscii( p);
    1639             :                 }
    1640             :             }
    1641             :         }
    1642             :     }
    1643             :     else
    1644             :     {
    1645           0 :         if (mbCachedVariants || cacheSimpleLSCV())
    1646           0 :             aVariants = maCachedVariants;
    1647             :     }
    1648           2 :     return aVariants;
    1649             : }
    1650             : 
    1651             : 
    1652  2392677436 : const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
    1653             : {
    1654  2392677436 :     if (!bResolveSystem && mbSystemLocale)
    1655         575 :         return theEmptyLocale::get();
    1656  2392676861 :     if (!mbInitializedLocale)
    1657  1314038604 :         syncVarsFromImpl();
    1658  2392676861 :     if (!mbInitializedLocale)
    1659             :     {
    1660           0 :         if (mbInitializedBcp47)
    1661           0 :             const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
    1662             :         else
    1663           0 :             const_cast<LanguageTag*>(this)->convertLangToLocale();
    1664             :     }
    1665  2392676861 :     return maLocale;
    1666             : }
    1667             : 
    1668             : 
    1669  1457853028 : LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
    1670             : {
    1671  1457853028 :     if (!bResolveSystem && mbSystemLocale)
    1672     2154989 :         return LANGUAGE_SYSTEM;
    1673  1455698039 :     if (!mbInitializedLangID)
    1674  1214209087 :         syncVarsFromImpl();
    1675  1455698039 :     if (!mbInitializedLangID)
    1676             :     {
    1677           0 :         if (mbInitializedBcp47)
    1678           0 :             const_cast<LanguageTag*>(this)->convertBcp47ToLang();
    1679             :         else
    1680             :         {
    1681           0 :             const_cast<LanguageTag*>(this)->convertLocaleToLang();
    1682             : 
    1683             :             /* Resolve a locale only unknown due to some redundant information,
    1684             :              * like 'de-Latn-DE' with script tag. Never call canonicalize()
    1685             :              * from within convert...() methods due to possible recursion, so
    1686             :              * do it here. */
    1687           0 :             if ((!mbSystemLocale && mnLangID == LANGUAGE_SYSTEM) || mnLangID == LANGUAGE_DONTKNOW)
    1688           0 :                 const_cast<LanguageTag*>(this)->synCanonicalize();
    1689             :         }
    1690             :     }
    1691  1455698039 :     return mnLangID;
    1692             : }
    1693             : 
    1694             : 
    1695           0 : void LanguageTag::getIsoLanguageScriptCountry( OUString& rLanguage, OUString& rScript, OUString& rCountry ) const
    1696             : {
    1697             :     // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
    1698             :     // and getCountry() to work correctly in this context.
    1699           0 :     if (isIsoODF())
    1700             :     {
    1701           0 :         rLanguage = getLanguage();
    1702           0 :         rScript   = getScript();
    1703           0 :         rCountry  = getCountry();
    1704             :     }
    1705             :     else
    1706             :     {
    1707           0 :         rLanguage = (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
    1708           0 :         rScript   = (LanguageTag::isIsoScript(   getScript())   ? getScript()   : OUString());
    1709           0 :         rCountry  = (LanguageTag::isIsoCountry(  getCountry())  ? getCountry()  : OUString());
    1710             :     }
    1711           0 : }
    1712             : 
    1713             : 
    1714             : namespace
    1715             : {
    1716             : 
    1717      640977 : inline bool isLowerAscii( sal_Unicode c )
    1718             : {
    1719      640977 :     return 'a' <= c && c <= 'z';
    1720             : }
    1721             : 
    1722      536174 : inline bool isUpperAscii( sal_Unicode c )
    1723             : {
    1724      536174 :     return 'A' <= c && c <= 'Z';
    1725             : }
    1726             : 
    1727             : }
    1728             : 
    1729             : 
    1730             : // static
    1731      276530 : bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
    1732             : {
    1733             :     /* TODO: ignore case? For now let's see where rubbish is used. */
    1734             :     bool b2chars;
    1735      917539 :     if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) &&
    1736     1106045 :             isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
    1737       87949 :             (b2chars || isLowerAscii( rLanguage[2])))
    1738      276505 :         return true;
    1739             :     SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
    1740             :                 (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
    1741             :             (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
    1742             :             "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
    1743          25 :     return false;
    1744             : }
    1745             : 
    1746             : 
    1747             : // static
    1748      300809 : bool LanguageTag::isIsoCountry( const OUString& rRegion )
    1749             : {
    1750             :     /* TODO: ignore case? For now let's see where rubbish is used. */
    1751      869702 :     if (rRegion.isEmpty() ||
    1752      536171 :             (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
    1753      300806 :         return true;
    1754             :     SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
    1755             :             "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
    1756           3 :     return false;
    1757             : }
    1758             : 
    1759             : 
    1760             : // static
    1761        5056 : bool LanguageTag::isIsoScript( const OUString& rScript )
    1762             : {
    1763             :     /* TODO: ignore case? For now let's see where rubbish is used. */
    1764       10118 :     if (rScript.isEmpty() ||
    1765          12 :             (rScript.getLength() == 4 &&
    1766          18 :              isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
    1767          12 :              isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
    1768        5056 :         return true;
    1769             :     SAL_WARN_IF( rScript.getLength() == 4 &&
    1770             :             (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
    1771             :              isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
    1772             :             "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
    1773           0 :     return false;
    1774             : }
    1775             : 
    1776             : 
    1777      386620 : OUString LanguageTagImpl::getLanguage() const
    1778             : {
    1779      386620 :     if (!mbCachedLanguage)
    1780             :     {
    1781      293059 :         maCachedLanguage = const_cast<LanguageTagImpl*>(this)->getLanguageFromLangtag();
    1782      293059 :         mbCachedLanguage = true;
    1783             :     }
    1784      386620 :     return maCachedLanguage;
    1785             : }
    1786             : 
    1787             : 
    1788     2232952 : OUString LanguageTag::getLanguage() const
    1789             : {
    1790     2232952 :     ImplPtr pImpl = getImpl();
    1791     2232952 :     if (pImpl->mbCachedLanguage)
    1792     2122864 :         return pImpl->maCachedLanguage;
    1793      220176 :     OUString aRet( pImpl->getLanguage());
    1794      110088 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1795     2343040 :     return aRet;
    1796             : }
    1797             : 
    1798             : 
    1799        5056 : OUString LanguageTagImpl::getScript() const
    1800             : {
    1801        5056 :     if (!mbCachedScript)
    1802             :     {
    1803          18 :         maCachedScript = const_cast<LanguageTagImpl*>(this)->getScriptFromLangtag();
    1804          18 :         mbCachedScript = true;
    1805             :     }
    1806        5056 :     return maCachedScript;
    1807             : }
    1808             : 
    1809             : 
    1810       63485 : OUString LanguageTag::getScript() const
    1811             : {
    1812       63485 :     ImplPtr pImpl = getImpl();
    1813       63485 :     if (pImpl->mbCachedScript)
    1814       63485 :         return pImpl->maCachedScript;
    1815           0 :     OUString aRet( pImpl->getScript());
    1816           0 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1817       63485 :     return aRet;
    1818             : }
    1819             : 
    1820             : 
    1821        7525 : OUString LanguageTag::getLanguageAndScript() const
    1822             : {
    1823        7525 :     OUString aLanguageScript( getLanguage());
    1824       15050 :     OUString aScript( getScript());
    1825        7525 :     if (!aScript.isEmpty())
    1826             :     {
    1827          17 :         aLanguageScript += "-" + aScript;
    1828             :     }
    1829       15050 :     return aLanguageScript;
    1830             : }
    1831             : 
    1832             : 
    1833       29327 : OUString LanguageTagImpl::getCountry() const
    1834             : {
    1835       29327 :     if (!mbCachedCountry)
    1836             :     {
    1837       24304 :         maCachedCountry = const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
    1838       24304 :         if (!LanguageTag::isIsoCountry( maCachedCountry))
    1839           1 :             maCachedCountry.clear();
    1840       24304 :         mbCachedCountry = true;
    1841             :     }
    1842       29327 :     return maCachedCountry;
    1843             : }
    1844             : 
    1845             : 
    1846      650659 : OUString LanguageTag::getCountry() const
    1847             : {
    1848      650659 :     ImplPtr pImpl = getImpl();
    1849      650659 :     if (pImpl->mbCachedCountry)
    1850      650432 :         return pImpl->maCachedCountry;
    1851         454 :     OUString aRet( pImpl->getCountry());
    1852         227 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1853      650886 :     return aRet;
    1854             : }
    1855             : 
    1856             : 
    1857      276505 : OUString LanguageTagImpl::getRegion() const
    1858             : {
    1859      276505 :     return const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
    1860             : }
    1861             : 
    1862             : 
    1863           6 : OUString LanguageTagImpl::getVariants() const
    1864             : {
    1865           6 :     if (!mbCachedVariants)
    1866             :     {
    1867           2 :         maCachedVariants = const_cast<LanguageTagImpl*>(this)->getVariantsFromLangtag();
    1868           2 :         mbCachedVariants = true;
    1869             :     }
    1870           6 :     return maCachedVariants;
    1871             : }
    1872             : 
    1873             : 
    1874       59178 : OUString LanguageTag::getVariants() const
    1875             : {
    1876       59178 :     ImplPtr pImpl = getImpl();
    1877       59178 :     if (pImpl->mbCachedVariants)
    1878       59178 :         return pImpl->maCachedVariants;
    1879           0 :     OUString aRet( pImpl->getVariants());
    1880           0 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1881       59178 :     return aRet;
    1882             : }
    1883             : 
    1884             : 
    1885           3 : OUString LanguageTag::getGlibcLocaleString( const OUString & rEncoding ) const
    1886             : {
    1887           3 :     OUString aRet;
    1888           3 :     if (isIsoLocale())
    1889             :     {
    1890           3 :         OUString aCountry( getCountry());
    1891           3 :         if (aCountry.isEmpty())
    1892           0 :             aRet = getLanguage() + rEncoding;
    1893             :         else
    1894           3 :             aRet = getLanguage() + "_" + aCountry + rEncoding;
    1895             :     }
    1896             :     else
    1897             :     {
    1898             :         /* FIXME: use the aImplIsoLangGLIBCModifiersEntries table from
    1899             :          * i18nlangtag/source/isolang/isolang.cxx or let liblangtag handle it.
    1900             :          * So far no code was prepared for anything else than a simple
    1901             :          * language_country locale so we don't lose anything here right now.
    1902             :          * */
    1903             :     }
    1904           3 :     return aRet;
    1905             : }
    1906             : 
    1907             : 
    1908       65129 : bool LanguageTagImpl::hasScript() const
    1909             : {
    1910       65129 :     if (!mbCachedScript)
    1911           0 :         getScript();
    1912       65129 :     return !maCachedScript.isEmpty();
    1913             : }
    1914             : 
    1915             : 
    1916       65129 : bool LanguageTag::hasScript() const
    1917             : {
    1918       65129 :     bool bRet = getImpl()->hasScript();
    1919       65129 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1920       65129 :     return bRet;
    1921             : }
    1922             : 
    1923             : 
    1924      313876 : bool LanguageTagImpl::cacheSimpleLSCV()
    1925             : {
    1926      627752 :     OUString aLanguage, aScript, aCountry, aVariants;
    1927      313876 :     Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
    1928      313876 :     bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV);
    1929      313876 :     if (bRet)
    1930             :     {
    1931      313871 :         maCachedLanguage = aLanguage;
    1932      313871 :         maCachedScript   = aScript;
    1933      313871 :         maCachedCountry  = aCountry;
    1934      313871 :         maCachedVariants = aVariants;
    1935      313871 :         mbCachedLanguage = mbCachedScript = mbCachedCountry = mbCachedVariants = true;
    1936             :     }
    1937      627752 :     return bRet;
    1938             : }
    1939             : 
    1940             : 
    1941      778944 : bool LanguageTagImpl::isIsoLocale() const
    1942             : {
    1943      778944 :     if (meIsIsoLocale == DECISION_DONTKNOW)
    1944             :     {
    1945      300569 :         const_cast<LanguageTagImpl*>(this)->synCanonicalize();
    1946             :         // It must be at most ll-CC or lll-CC
    1947             :         // Do not use getCountry() here, use getRegion() instead.
    1948      601138 :         meIsIsoLocale = ((maBcp47.isEmpty() ||
    1949     1707156 :                     (maBcp47.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
    1950     1454689 :                      LanguageTag::isIsoCountry( getRegion()))) ? DECISION_YES : DECISION_NO);
    1951             :     }
    1952      778944 :     return meIsIsoLocale == DECISION_YES;
    1953             : }
    1954             : 
    1955             : 
    1956      566874 : bool LanguageTag::isIsoLocale() const
    1957             : {
    1958      566874 :     bool bRet = getImpl()->isIsoLocale();
    1959      566874 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1960      566874 :     return bRet;
    1961             : }
    1962             : 
    1963             : 
    1964        5381 : bool LanguageTagImpl::isIsoODF() const
    1965             : {
    1966        5381 :     if (meIsIsoODF == DECISION_DONTKNOW)
    1967             :     {
    1968        5050 :         const_cast<LanguageTagImpl*>(this)->synCanonicalize();
    1969        5050 :         if (!LanguageTag::isIsoScript( getScript()))
    1970             :         {
    1971           0 :             meIsIsoODF = DECISION_NO;
    1972           0 :             return false;
    1973             :         }
    1974             :         // The usual case is lll-CC so simply check that first.
    1975        5050 :         if (isIsoLocale())
    1976             :         {
    1977        5029 :             meIsIsoODF = DECISION_YES;
    1978        5029 :             return true;
    1979             :         }
    1980             :         // If this is not ISO locale for which script must not exist it can
    1981             :         // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
    1982             :         // ll-vvvvvvvv
    1983          94 :         meIsIsoODF = ((maBcp47.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
    1984          53 :                     LanguageTag::isIsoCountry( getRegion()) && LanguageTag::isIsoScript( getScript()) &&
    1985          59 :                     getVariants().isEmpty()) ? DECISION_YES : DECISION_NO);
    1986             :     }
    1987         352 :     return meIsIsoODF == DECISION_YES;
    1988             : }
    1989             : 
    1990             : 
    1991         353 : bool LanguageTag::isIsoODF() const
    1992             : {
    1993         353 :     bool bRet = getImpl()->isIsoODF();
    1994         353 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1995         353 :     return bRet;
    1996             : }
    1997             : 
    1998             : 
    1999        5476 : bool LanguageTagImpl::isValidBcp47() const
    2000             : {
    2001        5476 :     if (meIsValid == DECISION_DONTKNOW)
    2002             :     {
    2003           0 :         const_cast<LanguageTagImpl*>(this)->synCanonicalize();
    2004             :         SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
    2005             :                 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
    2006             :     }
    2007        5476 :     return meIsValid == DECISION_YES;
    2008             : }
    2009             : 
    2010             : 
    2011         401 : bool LanguageTag::isValidBcp47() const
    2012             : {
    2013         401 :     bool bRet = getImpl()->isValidBcp47();
    2014         401 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    2015         401 :     return bRet;
    2016             : }
    2017             : 
    2018             : 
    2019             : 
    2020             : 
    2021        1926 : LanguageTag & LanguageTag::makeFallback()
    2022             : {
    2023        1926 :     if (!mbIsFallback)
    2024             :     {
    2025        1926 :         const lang::Locale& rLocale1 = getLocale( true);
    2026        1926 :         lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
    2027        5778 :         if (    rLocale1.Language != aLocale2.Language ||
    2028        3813 :                 rLocale1.Country  != aLocale2.Country ||
    2029        1887 :                 rLocale1.Variant  != aLocale2.Variant)
    2030             :         {
    2031          39 :             if (rLocale1.Language != "en" && aLocale2.Language == "en" && aLocale2.Country == "US")
    2032             :             {
    2033             :                 // "en-US" is the last resort fallback, try if we get a better
    2034             :                 // one for the fallback hierarchy of a non-"en" locale.
    2035           0 :                 ::std::vector< OUString > aFallbacks( getFallbackStrings( false));
    2036           0 :                 for (::std::vector< OUString >::const_iterator it( aFallbacks.begin()); it != aFallbacks.end(); ++it)
    2037             :                 {
    2038           0 :                     lang::Locale aLocale3( LanguageTag( *it).getLocale());
    2039           0 :                     aLocale2 = MsLangId::Conversion::lookupFallbackLocale( aLocale3);
    2040           0 :                     if (aLocale2.Language != "en" || aLocale2.Country != "US")
    2041           0 :                         break;  // for, success
    2042           0 :                 }
    2043             :             }
    2044             :             SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
    2045             :                     rLocale1.Language << "," << rLocale1.Country << "," << rLocale1.Variant << ") to (" <<
    2046             :                     aLocale2.Language << "," << aLocale2.Country << "," << aLocale2.Variant << ")");
    2047          39 :             reset( aLocale2);
    2048             :         }
    2049        1926 :         mbIsFallback = true;
    2050             :     }
    2051        1926 :     return *this;
    2052             : }
    2053             : 
    2054             : 
    2055             : /* TODO: maybe this now could take advantage of the mnOverride field in
    2056             :  * isolang.cxx entries and search for kSAME instead of harcoded special
    2057             :  * fallbacks. Though iterating through those tables would be slower and even
    2058             :  * then there would be some special cases, but we wouldn't lack entries that
    2059             :  * were missed out. */
    2060      438357 : ::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const
    2061             : {
    2062      438357 :     ::std::vector< OUString > aVec;
    2063      876714 :     OUString aLanguage( getLanguage());
    2064      876714 :     OUString aCountry( getCountry());
    2065      438357 :     if (isIsoLocale())
    2066             :     {
    2067      379181 :         if (!aCountry.isEmpty())
    2068             :         {
    2069      325180 :             if (bIncludeFullBcp47)
    2070      305818 :                 aVec.push_back( aLanguage + "-" + aCountry);
    2071      325180 :             if (aLanguage == "zh")
    2072             :             {
    2073             :                 // For zh-HK or zh-MO also list zh-TW, for all other zh-XX also
    2074             :                 // list zh-CN.
    2075        3864 :                 if (aCountry == "HK" || aCountry == "MO")
    2076        1288 :                     aVec.push_back( aLanguage + "-TW");
    2077        2576 :                 else if (aCountry != "CN")
    2078        1288 :                     aVec.push_back( aLanguage + "-CN");
    2079        3864 :                 aVec.push_back( aLanguage);
    2080             :             }
    2081      321316 :             else if (aLanguage == "sh")
    2082             :             {
    2083             :                 // Manual list instead of calling
    2084             :                 // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
    2085             :                 // that would also include "sh-*" again.
    2086           0 :                 aVec.push_back( "sr-Latn-" + aCountry);
    2087           0 :                 aVec.push_back( "sr-Latn");
    2088           0 :                 aVec.push_back( "sh");  // legacy with script, before default script with country
    2089           0 :                 aVec.push_back( "sr-" + aCountry);
    2090           0 :                 aVec.push_back( "sr");
    2091             :             }
    2092      321316 :             else if (aLanguage == "ca" && aCountry == "XV")
    2093             :             {
    2094           0 :                 ::std::vector< OUString > aRep( LanguageTag( "ca-ES-valencia").getFallbackStrings( true));
    2095           0 :                 aVec.insert( aVec.end(), aRep.begin(), aRep.end());
    2096             :                 // Already includes 'ca' language fallback.
    2097             :             }
    2098      321316 :             else if (aLanguage == "ku")
    2099             :             {
    2100           0 :                 if (aCountry == "TR" || aCountry == "SY")
    2101             :                 {
    2102           0 :                     aVec.push_back( "kmr-Latn-" + aCountry);
    2103           0 :                     aVec.push_back( "kmr-" + aCountry);
    2104           0 :                     aVec.push_back( "kmr-Latn");
    2105           0 :                     aVec.push_back( "kmr");
    2106           0 :                     aVec.push_back( aLanguage);
    2107             :                 }
    2108           0 :                 else if (aCountry == "IQ" || aCountry == "IR")
    2109             :                 {
    2110           0 :                     aVec.push_back( "ckb-" + aCountry);
    2111           0 :                     aVec.push_back( "ckb");
    2112             :                 }
    2113             :             }
    2114      321316 :             else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY"))
    2115             :             {
    2116           0 :                 aVec.push_back( "ku-Latn-" + aCountry);
    2117           0 :                 aVec.push_back( "ku-" + aCountry);
    2118           0 :                 aVec.push_back( aLanguage);
    2119           0 :                 aVec.push_back( "ku");
    2120             :             }
    2121      321316 :             else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR"))
    2122             :             {
    2123        3864 :                 aVec.push_back( "ku-Arab-" + aCountry);
    2124        3864 :                 aVec.push_back( "ku-" + aCountry);
    2125        3864 :                 aVec.push_back( aLanguage);
    2126             :                 // not 'ku' only, that was used for Latin script
    2127             :             }
    2128             :             else
    2129      317452 :                 aVec.push_back( aLanguage);
    2130             :         }
    2131             :         else
    2132             :         {
    2133       54001 :             if (bIncludeFullBcp47)
    2134       53999 :                 aVec.push_back( aLanguage);
    2135       54001 :             if (aLanguage == "sh")
    2136             :             {
    2137           0 :                 aVec.push_back( "sr-Latn");
    2138           0 :                 aVec.push_back( "sr");
    2139             :             }
    2140       54001 :             else if (aLanguage == "pli")
    2141             :             {
    2142             :                 // a special case for Pali dictionary, see fdo#41599
    2143           0 :                 aVec.push_back( "pi-Latn");
    2144           0 :                 aVec.push_back( "pi");
    2145             :             }
    2146             :         }
    2147      379181 :         return aVec;
    2148             :     }
    2149             : 
    2150       59176 :     getBcp47();     // have maBcp47 now
    2151       59176 :     if (bIncludeFullBcp47)
    2152       59176 :         aVec.push_back( maBcp47);
    2153      118352 :     OUString aScript;
    2154      118352 :     OUString aVariants( getVariants());
    2155      118352 :     OUString aTmp;
    2156       59176 :     if (hasScript())
    2157             :     {
    2158       55951 :         aScript = getScript();
    2159       55951 :         bool bHaveLanguageScriptVariant = false;
    2160       55951 :         if (!aCountry.isEmpty())
    2161             :         {
    2162       37655 :             if (!aVariants.isEmpty())
    2163             :             {
    2164           0 :                 aTmp = aLanguage + "-" + aScript + "-" + aCountry + "-" + aVariants;
    2165           0 :                 if (aTmp != maBcp47)
    2166           0 :                     aVec.push_back( aTmp);
    2167             :                 // Language with variant but without country before language
    2168             :                 // without variant but with country.
    2169           0 :                 aTmp = aLanguage + "-" + aScript + "-" + aVariants;
    2170           0 :                 if (aTmp != maBcp47)
    2171           0 :                     aVec.push_back( aTmp);
    2172           0 :                 bHaveLanguageScriptVariant = true;
    2173             :             }
    2174       37655 :             aTmp = aLanguage + "-" + aScript + "-" + aCountry;
    2175       37655 :             if (aTmp != maBcp47)
    2176           0 :                 aVec.push_back( aTmp);
    2177       37655 :             if (aLanguage == "sr" && aScript == "Latn")
    2178             :             {
    2179             :                 // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
    2180        9018 :                 if (aCountry == "CS")
    2181             :                 {
    2182        2160 :                     aVec.push_back( "sr-Latn-YU");
    2183        2160 :                     aVec.push_back( "sh-CS");
    2184        2160 :                     aVec.push_back( "sh-YU");
    2185             :                 }
    2186             :                 else
    2187        6858 :                     aVec.push_back( "sh-" + aCountry);
    2188             :             }
    2189       28637 :             else if (aLanguage == "pi" && aScript == "Latn")
    2190           0 :                 aVec.push_back( "pli");     // a special case for Pali dictionary, see fdo#41599
    2191       28637 :             else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY"))
    2192           0 :                 aVec.push_back( "ku-" + aCountry);
    2193             :         }
    2194       55951 :         if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant)
    2195             :         {
    2196           0 :             aTmp = aLanguage + "-" + aScript + "-" + aVariants;
    2197           0 :             if (aTmp != maBcp47)
    2198           0 :                 aVec.push_back( aTmp);
    2199             :         }
    2200       55951 :         aTmp = aLanguage + "-" + aScript;
    2201       55951 :         if (aTmp != maBcp47)
    2202       37655 :             aVec.push_back( aTmp);
    2203             : 
    2204             :         // 'sh' actually denoted a script, so have it here instead of appended
    2205             :         // at the end as language-only.
    2206       55951 :         if (aLanguage == "sr" && aScript == "Latn")
    2207       10950 :             aVec.push_back( "sh");
    2208       45001 :         else if (aLanguage == "ku" && aScript == "Arab")
    2209           0 :             aVec.push_back( "ckb");
    2210             :         // 'ku' only denoted Latin script
    2211       45001 :         else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty())
    2212           0 :             aVec.push_back( "ku");
    2213             :     }
    2214       59176 :     bool bHaveLanguageVariant = false;
    2215       59176 :     if (!aCountry.isEmpty())
    2216             :     {
    2217       40878 :         if (!aVariants.isEmpty())
    2218             :         {
    2219        3223 :             aTmp = aLanguage + "-" + aCountry + "-" + aVariants;
    2220        3223 :             if (aTmp != maBcp47)
    2221           0 :                 aVec.push_back( aTmp);
    2222        3223 :             if (maBcp47 == "ca-ES-valencia")
    2223        2578 :                 aVec.push_back( "ca-XV");
    2224             :             // Language with variant but without country before language
    2225             :             // without variant but with country.
    2226             :             // But only if variant is not from a grandfathered tag that
    2227             :             // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
    2228             :             // not.
    2229        6446 :             if (aVariants.getLength() >= 5 ||
    2230         645 :                     (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
    2231             :             {
    2232        2578 :                 aTmp = aLanguage + "-" + aVariants;
    2233        2578 :                 if (aTmp != maBcp47)
    2234        2578 :                     aVec.push_back( aTmp);
    2235        2578 :                 bHaveLanguageVariant = true;
    2236             :             }
    2237             :         }
    2238       40878 :         aTmp = aLanguage + "-" + aCountry;
    2239       40878 :         if (aTmp != maBcp47)
    2240       40878 :             aVec.push_back( aTmp);
    2241             :     }
    2242       59176 :     if (!aVariants.isEmpty() && !bHaveLanguageVariant)
    2243             :     {
    2244             :         // Only if variant is not from a grandfathered tag that wouldn't match
    2245             :         // the rules, i.e. "de-1901" is fine but "en-oed" is not.
    2246        1295 :         if (aVariants.getLength() >= 5 ||
    2247         647 :                 (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
    2248             :         {
    2249           2 :             aTmp = aLanguage + "-" + aVariants;
    2250           2 :             if (aTmp != maBcp47)
    2251           0 :                 aVec.push_back( aTmp);
    2252             :         }
    2253             :     }
    2254             : 
    2255             :     // Insert legacy fallbacks with country before language-only, but only
    2256             :     // default script, script was handled already above.
    2257       59176 :     if (!aCountry.isEmpty())
    2258             :     {
    2259       40878 :         if (aLanguage == "sr" && aCountry == "CS")
    2260        2160 :             aVec.push_back( "sr-YU");
    2261             :     }
    2262             : 
    2263             :     // Original language-only.
    2264       59176 :     if (aLanguage != maBcp47)
    2265       59176 :         aVec.push_back( aLanguage);
    2266             : 
    2267       59176 :     return aVec;
    2268             : }
    2269             : 
    2270             : 
    2271           0 : bool LanguageTag::equals( const LanguageTag & rLanguageTag, bool bResolveSystem ) const
    2272             : {
    2273             :     // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
    2274             :     // can use the operator==() optimization.
    2275           0 :     if (!bResolveSystem || isSystemLocale() == rLanguageTag.isSystemLocale())
    2276           0 :         return operator==( rLanguageTag);
    2277             : 
    2278             :     // Compare full language tag strings.
    2279           0 :     return getBcp47( bResolveSystem) == rLanguageTag.getBcp47( bResolveSystem);
    2280             : }
    2281             : 
    2282             : 
    2283    87289987 : bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
    2284             : {
    2285    87289987 :     if (isSystemLocale() && rLanguageTag.isSystemLocale())
    2286      234237 :         return true;    // both SYSTEM
    2287             : 
    2288             :     // No need to convert to BCP47 if both Lang-IDs are available.
    2289    87055750 :     if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
    2290             :     {
    2291             :         // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
    2292     1671664 :         return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
    2293             :     }
    2294             : 
    2295             :     // Compare full language tag strings but SYSTEM unresolved.
    2296    85384086 :     return getBcp47( false) == rLanguageTag.getBcp47( false);
    2297             : }
    2298             : 
    2299             : 
    2300    87279743 : bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
    2301             : {
    2302    87279743 :     return !operator==( rLanguageTag);
    2303             : }
    2304             : 
    2305             : 
    2306          87 : bool LanguageTag::operator<( const LanguageTag & rLanguageTag ) const
    2307             : {
    2308          87 :     return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag.getBcp47( false)) < 0;
    2309             : }
    2310             : 
    2311             : 
    2312             : // static
    2313      533795 : LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp47,
    2314             :         OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rVariants )
    2315             : {
    2316      533795 :     Extraction eRet = EXTRACTED_NONE;
    2317      533795 :     const sal_Int32 nLen = rBcp47.getLength();
    2318      533795 :     const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
    2319      533795 :     sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
    2320      533795 :     sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
    2321      533795 :     sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
    2322      533795 :     if (nLen == 1 && rBcp47[0] == '*')              // * the dreaded jolly joker
    2323             :     {
    2324             :         // It's f*d up but we need to recognize this.
    2325           6 :         eRet = EXTRACTED_X_JOKER;
    2326             :     }
    2327      533789 :     else if (nHyph1 == 1 && rBcp47[0] == 'x')       // x-... privateuse
    2328             :     {
    2329             :         // x-... privateuse tags MUST be known to us by definition.
    2330          14 :         eRet = EXTRACTED_X;
    2331             :     }
    2332      533775 :     else if (nLen == 2 || nLen == 3)                // ll or lll
    2333             :     {
    2334       81558 :         if (nHyph1 < 0)
    2335             :         {
    2336       40779 :             rLanguage = rBcp47.toAsciiLowerCase();
    2337       40779 :             rScript.clear();
    2338       40779 :             rCountry.clear();
    2339       40779 :             rVariants.clear();
    2340       40779 :             eRet = EXTRACTED_LSC;
    2341             :         }
    2342             :     }
    2343      492996 :     else if (  (nHyph1 == 2 && nLen == 5)           // ll-CC
    2344      201377 :             || (nHyph1 == 3 && nLen == 6))          // lll-CC
    2345             :     {
    2346      853136 :         if (nHyph2 < 0)
    2347             :         {
    2348      426568 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2349      426568 :             rCountry  = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
    2350      426568 :             rScript.clear();
    2351      426568 :             rVariants.clear();
    2352      426568 :             eRet = EXTRACTED_LSC;
    2353             :         }
    2354             :     }
    2355       66428 :     else if (  (nHyph1 == 2 && nLen ==  7)          // ll-Ssss or ll-vvvv
    2356       45657 :             || (nHyph1 == 3 && nLen ==  8))         // lll-Ssss or lll-vvvv
    2357             :     {
    2358       25015 :         if (nHyph2 < 0)
    2359             :         {
    2360       25015 :             sal_Unicode c = rBcp47[nHyph1+1];
    2361       25015 :             if ('0' <= c && c <= '9')
    2362             :             {
    2363             :                 // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
    2364           1 :                 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2365           1 :                 rScript.clear();
    2366           1 :                 rCountry.clear();
    2367           1 :                 rVariants = rBcp47.copy( nHyph1 + 1);
    2368           1 :                 eRet = EXTRACTED_LV;
    2369             :             }
    2370             :             else
    2371             :             {
    2372       25014 :                 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2373       50028 :                 rScript   = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() +
    2374       75042 :                             rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
    2375       25014 :                 rCountry.clear();
    2376       25014 :                 rVariants.clear();
    2377       25014 :                 eRet = EXTRACTED_LSC;
    2378             :             }
    2379       25015 :         }
    2380             :     }
    2381       41413 :     else if (  (nHyph1 == 2 && nHyph2 == 7 && nLen == 10)   // ll-Ssss-CC
    2382       14720 :             || (nHyph1 == 3 && nHyph2 == 8 && nLen == 11))  // lll-Ssss-CC
    2383             :     {
    2384       78020 :         if (nHyph3 < 0)
    2385             :         {
    2386       39010 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2387       39010 :             rScript   = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
    2388       39010 :             rCountry  = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
    2389       39010 :             rVariants.clear();
    2390       39010 :             eRet = EXTRACTED_LSC;
    2391             :         }
    2392             :     }
    2393        2403 :     else if (  (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15)   // ll-Ssss-CC-vvvv[vvvv][-...]
    2394        2403 :             || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16))  // lll-Ssss-CC-vvvv[vvvv][-...]
    2395             :     {
    2396           0 :         if (nHyph4 < 0)
    2397           0 :             nHyph4 = rBcp47.getLength();
    2398           0 :         if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
    2399             :         {
    2400           0 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2401           0 :             rScript   = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
    2402           0 :             rCountry  = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
    2403           0 :             rVariants = rBcp47.copy( nHyph3 + 1);
    2404           0 :             eRet = EXTRACTED_LV;
    2405             :         }
    2406             :     }
    2407        2403 :     else if (  (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10)   // ll-CC-vvvv[vvvv][-...]
    2408        1113 :             || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11))  // lll-CC-vvvv[vvvv][-...]
    2409             :     {
    2410        1290 :         if (nHyph3 < 0)
    2411        1290 :             nHyph3 = rBcp47.getLength();
    2412        2580 :         if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
    2413             :         {
    2414        1290 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2415        1290 :             rScript.clear();
    2416        1290 :             rCountry  = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
    2417        1290 :             rVariants = rBcp47.copy( nHyph2 + 1);
    2418        1290 :             eRet = EXTRACTED_LV;
    2419             :         }
    2420             :     }
    2421        1113 :     else if (  (nHyph1 == 2 && nLen >= 8)                   // ll-vvvvv[vvv][-...]
    2422          47 :             || (nHyph1 == 3 && nLen >= 9))                  // lll-vvvvv[vvv][-...]
    2423             :     {
    2424        1066 :         if (nHyph2 < 0)
    2425           5 :             nHyph2 = rBcp47.getLength();
    2426        1066 :         if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)
    2427             :         {
    2428           2 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2429           2 :             rScript.clear();
    2430           2 :             rCountry.clear();
    2431           2 :             rVariants = rBcp47.copy( nHyph1 + 1);
    2432           2 :             eRet = EXTRACTED_LV;
    2433             :         }
    2434             :         else
    2435             :         {
    2436             :             // Known and handled grandfathered; ugly but effective ...
    2437             :             // Note that nLen must have matched above.
    2438             :             // Strictly not a variant, but so far we treat it as such.
    2439        1064 :             if (rBcp47.equalsIgnoreAsciiCase( "en-GB-oed"))
    2440             :             {
    2441        1061 :                 rLanguage = "en";
    2442        1061 :                 rScript.clear();
    2443        1061 :                 rCountry  = "GB";
    2444        1061 :                 rVariants = "oed";
    2445        1061 :                 eRet = EXTRACTED_LV;
    2446             :             }
    2447             :         }
    2448             :     }
    2449      533795 :     if (eRet == EXTRACTED_NONE)
    2450             :     {
    2451             :         SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'");
    2452          50 :         rLanguage.clear();
    2453          50 :         rScript.clear();
    2454          50 :         rCountry.clear();
    2455          50 :         rVariants.clear();
    2456             :     }
    2457      533795 :     return eRet;
    2458             : }
    2459             : 
    2460             : 
    2461             : // static
    2462       50428 : ::std::vector< OUString >::const_iterator LanguageTag::getFallback(
    2463             :         const ::std::vector< OUString > & rList, const OUString & rReference )
    2464             : {
    2465       50428 :     if (rList.empty())
    2466           0 :         return rList.end();
    2467             : 
    2468       50428 :     ::std::vector< OUString >::const_iterator it;
    2469             : 
    2470             :     // Try the simple case first without constructing fallbacks.
    2471       69772 :     for (it = rList.begin(); it != rList.end(); ++it)
    2472             :     {
    2473       50428 :         if (*it == rReference)
    2474       31084 :             return it;  // exact match
    2475             :     }
    2476             : 
    2477       19344 :     ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
    2478       19344 :     if (rReference != "en-US")
    2479             :     {
    2480           0 :         aFallbacks.push_back( "en-US");
    2481           0 :         if (rReference != "en")
    2482           0 :             aFallbacks.push_back( "en");
    2483             :     }
    2484       19344 :     if (rReference != "x-default")
    2485       19344 :         aFallbacks.push_back( "x-default");
    2486       19344 :     if (rReference != "x-no-translate")
    2487       19344 :         aFallbacks.push_back( "x-no-translate");
    2488             :     /* TODO: the original comphelper::Locale::getFallback() code had
    2489             :      * "x-notranslate" instead of "x-no-translate", but all .xcu files use
    2490             :      * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
    2491             :      * Did that ever work? Was it supposed to work at all like this? */
    2492             : 
    2493       75738 :     for (::std::vector< OUString >::const_iterator fb = aFallbacks.begin(); fb != aFallbacks.end(); ++fb)
    2494             :     {
    2495      113607 :         for (it = rList.begin(); it != rList.end(); ++it)
    2496             :         {
    2497       57213 :             if (*it == *fb)
    2498         819 :                 return it;  // fallback found
    2499             :         }
    2500             :     }
    2501             : 
    2502             :     // Did not find anything so return something of the list, the first value
    2503             :     // will do as well as any other as none did match any of the possible
    2504             :     // fallbacks.
    2505       18525 :     return rList.begin();
    2506             : }
    2507             : 
    2508             : 
    2509             : // static
    2510           0 : ::std::vector< com::sun::star::lang::Locale >::const_iterator LanguageTag::getMatchingFallback(
    2511             :         const ::std::vector< com::sun::star::lang::Locale > & rList,
    2512             :         const com::sun::star::lang::Locale & rReference )
    2513             : {
    2514           0 :     if (rList.empty())
    2515           0 :         return rList.end();
    2516             : 
    2517           0 :     ::std::vector< lang::Locale >::const_iterator it;
    2518             : 
    2519             :     // Try the simple case first without constructing fallbacks.
    2520           0 :     for (it = rList.begin(); it != rList.end(); ++it)
    2521             :     {
    2522           0 :         if (    (*it).Language == rReference.Language &&
    2523           0 :                 (*it).Country  == rReference.Country  &&
    2524           0 :                 (*it).Variant  == rReference.Variant)
    2525           0 :             return it;  // exact match
    2526             :     }
    2527             : 
    2528             :     // Now for each reference fallback test the fallbacks of the list in order.
    2529           0 :     ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
    2530           0 :     ::std::vector< ::std::vector< OUString > > aListFallbacks( rList.size());
    2531           0 :     size_t i = 0;
    2532           0 :     for (it = rList.begin(); it != rList.end(); ++it, ++i)
    2533             :     {
    2534           0 :         ::std::vector< OUString > aTmp( LanguageTag( *it).getFallbackStrings( true));
    2535           0 :         aListFallbacks[i] = aTmp;
    2536           0 :     }
    2537           0 :     for (::std::vector< OUString >::const_iterator rfb( aFallbacks.begin()); rfb != aFallbacks.end(); ++rfb)
    2538             :     {
    2539           0 :         for (::std::vector< ::std::vector< OUString > >::const_iterator lfb( aListFallbacks.begin());
    2540           0 :                 lfb != aListFallbacks.end(); ++lfb)
    2541             :         {
    2542           0 :             for (::std::vector< OUString >::const_iterator fb( (*lfb).begin()); fb != (*lfb).end(); ++fb)
    2543             :             {
    2544           0 :                 if (*rfb == *fb)
    2545           0 :                     return rList.begin() + (lfb - aListFallbacks.begin());
    2546             :             }
    2547             :         }
    2548             :     }
    2549             : 
    2550             :     // No match found.
    2551           0 :     return rList.end();
    2552             : }
    2553             : 
    2554             : 
    2555         234 : static bool lcl_isSystem( LanguageType nLangID )
    2556             : {
    2557         234 :     if (nLangID == LANGUAGE_SYSTEM)
    2558         199 :         return true;
    2559             :     // There are some special values that simplify to SYSTEM,
    2560             :     // getRealLanguage() catches and resolves them.
    2561          35 :     LanguageType nNewLangID = MsLangId::getRealLanguage( nLangID);
    2562          35 :     if (nNewLangID != nLangID)
    2563           0 :         return true;
    2564          35 :     return false;
    2565             : }
    2566             : 
    2567             : 
    2568             : // static
    2569  1228346507 : com::sun::star::lang::Locale LanguageTag::convertToLocale( LanguageType nLangID, bool bResolveSystem )
    2570             : {
    2571  1228346507 :     if (!bResolveSystem && lcl_isSystem( nLangID))
    2572         199 :         return lang::Locale();
    2573             : 
    2574  1228346308 :     return LanguageTag( nLangID).getLocale( bResolveSystem);
    2575             : }
    2576             : 
    2577             : 
    2578             : // static
    2579  1214244301 : LanguageType LanguageTag::convertToLanguageType( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem )
    2580             : {
    2581  1214244301 :     if (rLocale.Language.isEmpty() && !bResolveSystem)
    2582       64408 :         return LANGUAGE_SYSTEM;
    2583             : 
    2584  1214179893 :     return LanguageTag( rLocale).getLanguageType( bResolveSystem);
    2585             : }
    2586             : 
    2587             : 
    2588             : // static
    2589  1299899351 : OUString LanguageTagImpl::convertToBcp47( const com::sun::star::lang::Locale& rLocale )
    2590             : {
    2591  1299899351 :     OUString aBcp47;
    2592  1299899351 :     if (rLocale.Language.isEmpty())
    2593             :     {
    2594             :         // aBcp47 stays empty
    2595             :     }
    2596  1299899351 :     else if (rLocale.Language == I18NLANGTAG_QLT)
    2597             :     {
    2598       44722 :         aBcp47 = rLocale.Variant;
    2599             :     }
    2600             :     else
    2601             :     {
    2602             :         /* XXX NOTE: most legacy code never evaluated the Variant field, so for
    2603             :          * now just concatenate language and country. In case we stumbled over
    2604             :          * variant aware code we'd have to take care of that. */
    2605  1299854629 :         if (rLocale.Country.isEmpty())
    2606   388900635 :             aBcp47 = rLocale.Language;
    2607             :         else
    2608             :         {
    2609   910953994 :             aBcp47 = rLocale.Language + "-" + rLocale.Country;
    2610             :         }
    2611             :     }
    2612  1299899351 :     return aBcp47;
    2613             : }
    2614             : 
    2615             : 
    2616             : // static
    2617       95661 : OUString LanguageTag::convertToBcp47( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem )
    2618             : {
    2619       95661 :     OUString aBcp47;
    2620       95661 :     if (rLocale.Language.isEmpty())
    2621             :     {
    2622          57 :         if (bResolveSystem)
    2623           1 :             aBcp47 = LanguageTag::convertToBcp47( LANGUAGE_SYSTEM, true);
    2624             :         // else aBcp47 stays empty
    2625             :     }
    2626             :     else
    2627             :     {
    2628       95604 :         aBcp47 = LanguageTagImpl::convertToBcp47( rLocale);
    2629             :     }
    2630       95661 :     return aBcp47;
    2631             : }
    2632             : 
    2633             : 
    2634             : // static
    2635         302 : OUString LanguageTag::convertToBcp47( LanguageType nLangID, bool bResolveSystem )
    2636             : {
    2637             :     // Catch this first so we don't need the rest.
    2638         302 :     if (!bResolveSystem && lcl_isSystem( nLangID))
    2639           0 :         return OUString();
    2640             : 
    2641         302 :     lang::Locale aLocale( LanguageTag::convertToLocale( nLangID, bResolveSystem));
    2642             :     // If system for some reason (should not happen.. haha) could not be
    2643             :     // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
    2644             :     // would recurse into this method here!
    2645         302 :     if (aLocale.Language.isEmpty() && bResolveSystem)
    2646           0 :         return OUString();      // bad luck, bail out
    2647         302 :     return LanguageTagImpl::convertToBcp47( aLocale);
    2648             : }
    2649             : 
    2650             : 
    2651             : // static
    2652       41751 : com::sun::star::lang::Locale LanguageTag::convertToLocale( const OUString& rBcp47, bool bResolveSystem )
    2653             : {
    2654       41751 :     if (rBcp47.isEmpty() && !bResolveSystem)
    2655           0 :         return lang::Locale();
    2656             : 
    2657       41751 :     return LanguageTag( rBcp47).getLocale( bResolveSystem);
    2658             : }
    2659             : 
    2660             : 
    2661             : // static
    2662        1598 : LanguageType LanguageTag::convertToLanguageType( const OUString& rBcp47, bool bResolveSystem )
    2663             : {
    2664        1598 :     if (rBcp47.isEmpty() && !bResolveSystem)
    2665           0 :         return LANGUAGE_SYSTEM;
    2666             : 
    2667        1598 :     return LanguageTag( rBcp47).getLanguageType( bResolveSystem);
    2668             : }
    2669             : 
    2670             : 
    2671             : // static
    2672        1315 : LanguageType LanguageTag::convertToLanguageTypeWithFallback( const OUString& rBcp47 )
    2673             : {
    2674        1315 :     return LanguageTag( rBcp47).makeFallback().getLanguageType( true);
    2675             : }
    2676             : 
    2677             : 
    2678             : // static
    2679           0 : com::sun::star::lang::Locale LanguageTag::convertToLocaleWithFallback( const OUString& rBcp47 )
    2680             : {
    2681           0 :     return LanguageTag( rBcp47).makeFallback().getLocale( true);
    2682             : }
    2683             : 
    2684             : 
    2685             : // static
    2686           9 : bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicalized, bool bDisallowPrivate )
    2687             : {
    2688           9 :     bool bValid = false;
    2689             : 
    2690             :     struct guard
    2691             :     {
    2692             :         lt_tag_t* mpLangtag;
    2693           9 :         guard()
    2694             :         {
    2695           9 :             theDataRef::get().init();
    2696           9 :             mpLangtag = lt_tag_new();
    2697           9 :         }
    2698           9 :         ~guard()
    2699             :         {
    2700           9 :             lt_tag_unref( mpLangtag);
    2701           9 :         }
    2702           9 :     } aVar;
    2703             : 
    2704          18 :     myLtError aError;
    2705             : 
    2706           9 :     if (lt_tag_parse( aVar.mpLangtag, OUStringToOString( rString, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
    2707             :     {
    2708           8 :         char* pTag = lt_tag_canonicalize( aVar.mpLangtag, &aError.p);
    2709             :         SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTag:isValidBcp47: could not canonicalize '" << rString << "'");
    2710           8 :         if (pTag)
    2711             :         {
    2712           8 :             bValid = true;
    2713           8 :             if (bDisallowPrivate)
    2714             :             {
    2715           3 :                 const lt_string_t* pPrivate = lt_tag_get_privateuse( aVar.mpLangtag);
    2716           3 :                 if (pPrivate && lt_string_length( pPrivate) > 0)
    2717           1 :                     bValid = false;
    2718             :                 else
    2719             :                 {
    2720           2 :                     const lt_lang_t* pLangT = lt_tag_get_language( aVar.mpLangtag);
    2721           2 :                     if (pLangT)
    2722             :                     {
    2723           2 :                         const char* pLang = lt_lang_get_tag( pLangT);
    2724           2 :                         if (pLang && strcmp( pLang, I18NLANGTAG_QLT) == 0)
    2725             :                         {
    2726             :                             // Disallow 'qlt' privateuse code to prevent
    2727             :                             // confusion with our internal usage.
    2728           0 :                             bValid = false;
    2729             :                         }
    2730             :                     }
    2731             :                 }
    2732             :             }
    2733           8 :             if (o_pCanonicalized)
    2734           8 :                 *o_pCanonicalized = OUString::createFromAscii( pTag);
    2735           8 :             free( pTag);
    2736           8 :             return bValid;
    2737             :         }
    2738             :     }
    2739             :     else
    2740             :     {
    2741             :         SAL_INFO( "i18nlangtag", "LanguageTag:isValidBcp47: could not parse '" << rString << "'");
    2742             :     }
    2743          10 :     return bValid;
    2744             : }
    2745             : 
    2746       10244 : LanguageTag makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage)
    2747             : {
    2748             :     //map the simple ones via LanguageTypes, and the hard ones explicitly
    2749       10244 :     LanguageType nLang(LANGUAGE_DONTKNOW);
    2750             : 
    2751       10244 :     switch (nLanguage)
    2752             :     {
    2753             :         case AppleLanguageId::ENGLISH:
    2754       10244 :             nLang = LANGUAGE_ENGLISH;
    2755       10244 :             break;
    2756             :         case AppleLanguageId::FRENCH:
    2757           0 :             nLang = LANGUAGE_FRENCH;
    2758           0 :             break;
    2759             :         case AppleLanguageId::GERMAN:
    2760           0 :             nLang = LANGUAGE_GERMAN;
    2761           0 :             break;
    2762             :         case AppleLanguageId::ITALIAN:
    2763           0 :             nLang = LANGUAGE_ITALIAN;
    2764           0 :             break;
    2765             :         case AppleLanguageId::DUTCH:
    2766           0 :             nLang = LANGUAGE_DUTCH;
    2767           0 :             break;
    2768             :         case AppleLanguageId::SWEDISH:
    2769           0 :             nLang = LANGUAGE_SWEDISH;
    2770           0 :             break;
    2771             :         case AppleLanguageId::SPANISH:
    2772           0 :             nLang = LANGUAGE_SPANISH;
    2773           0 :             break;
    2774             :         case AppleLanguageId::DANISH:
    2775           0 :             nLang = LANGUAGE_DANISH;
    2776           0 :             break;
    2777             :         case AppleLanguageId::PORTUGUESE:
    2778           0 :             nLang = LANGUAGE_PORTUGUESE;
    2779           0 :             break;
    2780             :         case AppleLanguageId::NORWEGIAN:
    2781           0 :             nLang = LANGUAGE_NORWEGIAN;
    2782           0 :             break;
    2783             :         case AppleLanguageId::HEBREW:
    2784           0 :             nLang = LANGUAGE_HEBREW;
    2785           0 :             break;
    2786             :         case AppleLanguageId::JAPANESE:
    2787           0 :             nLang = LANGUAGE_JAPANESE;
    2788           0 :             break;
    2789             :         case AppleLanguageId::ARABIC:
    2790           0 :             nLang = LANGUAGE_ARABIC_PRIMARY_ONLY;
    2791           0 :             break;
    2792             :         case AppleLanguageId::FINNISH:
    2793           0 :             nLang = LANGUAGE_FINNISH;
    2794           0 :             break;
    2795             :         case AppleLanguageId::GREEK:
    2796           0 :             nLang = LANGUAGE_GREEK;
    2797           0 :             break;
    2798             :         case AppleLanguageId::ICELANDIC:
    2799           0 :             nLang = LANGUAGE_ICELANDIC;
    2800           0 :             break;
    2801             :         case AppleLanguageId::MALTESE:
    2802           0 :             nLang = LANGUAGE_MALTESE;
    2803           0 :             break;
    2804             :         case AppleLanguageId::TURKISH:
    2805           0 :             nLang = LANGUAGE_TURKISH;
    2806           0 :             break;
    2807             :         case AppleLanguageId::CROATIAN:
    2808           0 :             nLang = LANGUAGE_CROATIAN;
    2809           0 :             break;
    2810             :         case AppleLanguageId::CHINESE_TRADITIONAL:
    2811           0 :             nLang = LANGUAGE_CHINESE_TRADITIONAL;
    2812           0 :             break;
    2813             :         case AppleLanguageId::URDU:
    2814           0 :             nLang = LANGUAGE_URDU_PAKISTAN; //probably, otherwise we need a LANGUAGE_URDU_PRIMARY_ONLY
    2815           0 :             break;
    2816             :         case AppleLanguageId::HINDI:
    2817           0 :             nLang = LANGUAGE_HINDI;
    2818           0 :             break;
    2819             :         case AppleLanguageId::THAI:
    2820           0 :             nLang = LANGUAGE_THAI;
    2821           0 :             break;
    2822             :         case AppleLanguageId::KOREAN:
    2823           0 :             nLang = LANGUAGE_KOREAN;
    2824           0 :             break;
    2825             :         case AppleLanguageId::LITHUANIAN:
    2826           0 :             nLang = LANGUAGE_LITHUANIAN;
    2827           0 :             break;
    2828             :         case AppleLanguageId::POLISH:
    2829           0 :             nLang = LANGUAGE_POLISH;
    2830           0 :             break;
    2831             :         case AppleLanguageId::HUNGARIAN:
    2832           0 :             nLang = LANGUAGE_HUNGARIAN;
    2833           0 :             break;
    2834             :         case AppleLanguageId::ESTONIAN:
    2835           0 :             nLang = LANGUAGE_ESTONIAN;
    2836           0 :             break;
    2837             :         case AppleLanguageId::LATVIAN:
    2838           0 :             nLang = LANGUAGE_LATVIAN;
    2839           0 :             break;
    2840             :         case AppleLanguageId::SAMI:
    2841           0 :             nLang = LANGUAGE_SAMI_NORTHERN_NORWAY; //maybe
    2842           0 :             break;
    2843             :         case AppleLanguageId::FAROESE:
    2844           0 :             nLang = LANGUAGE_FAEROESE;
    2845           0 :             break;
    2846             :         case AppleLanguageId::FARSI:
    2847           0 :             nLang = LANGUAGE_FARSI;
    2848           0 :             break;
    2849             :         case AppleLanguageId::RUSSIAN:
    2850           0 :             nLang = LANGUAGE_RUSSIAN;
    2851           0 :             break;
    2852             :         case AppleLanguageId::CHINESE_SIMPLIFIED:
    2853           0 :             nLang = LANGUAGE_CHINESE_SIMPLIFIED;
    2854           0 :             break;
    2855             :         case AppleLanguageId::FLEMISH:
    2856           0 :             nLang = LANGUAGE_DUTCH_BELGIAN;
    2857           0 :             break;
    2858             :         case AppleLanguageId::IRISH_GAELIC:
    2859           0 :             nLang = LANGUAGE_GAELIC_IRELAND;
    2860           0 :             break;
    2861             :         case AppleLanguageId::ALBANIAN:
    2862           0 :             nLang = LANGUAGE_ALBANIAN;
    2863           0 :             break;
    2864             :         case AppleLanguageId::ROMANIAN:
    2865           0 :             nLang = LANGUAGE_ROMANIAN;
    2866           0 :             break;
    2867             :         case AppleLanguageId::CZECH:
    2868           0 :             nLang = LANGUAGE_CZECH;
    2869           0 :             break;
    2870             :         case AppleLanguageId::SLOVAK:
    2871           0 :             nLang = LANGUAGE_SLOVAK;
    2872           0 :             break;
    2873             :         case AppleLanguageId::SLOVENIAN:
    2874           0 :             nLang = LANGUAGE_SLOVENIAN;
    2875           0 :             break;
    2876             :         case AppleLanguageId::YIDDISH:
    2877           0 :             nLang = LANGUAGE_YIDDISH;
    2878           0 :             break;
    2879             :         case AppleLanguageId::SERBIAN:
    2880           0 :             nLang = LANGUAGE_SERBIAN_CYRILLIC_SERBIA;   //maybe
    2881           0 :             break;
    2882             :         case AppleLanguageId::MACEDONIAN:
    2883           0 :             nLang = LANGUAGE_MACEDONIAN;
    2884           0 :             break;
    2885             :         case AppleLanguageId::BULGARIAN:
    2886           0 :             nLang = LANGUAGE_BULGARIAN;
    2887           0 :             break;
    2888             :         case AppleLanguageId::UKRAINIAN:
    2889           0 :             nLang = LANGUAGE_UKRAINIAN;
    2890           0 :             break;
    2891             :         case AppleLanguageId::BYELORUSSIAN:
    2892           0 :             nLang = LANGUAGE_BELARUSIAN;
    2893           0 :             break;
    2894             :         case AppleLanguageId::UZBEK:
    2895           0 :             nLang = LANGUAGE_UZBEK_CYRILLIC; //maybe
    2896           0 :             break;
    2897             :         case AppleLanguageId::KAZAKH:
    2898           0 :             nLang = LANGUAGE_KAZAKH;
    2899           0 :             break;
    2900             :         case AppleLanguageId::AZERI_CYRILLIC:
    2901           0 :             nLang = LANGUAGE_AZERI_CYRILLIC;
    2902           0 :             break;
    2903             :         case AppleLanguageId::AZERI_ARABIC:
    2904           0 :             return LanguageTag("az-Arab");
    2905             :             break;
    2906             :         case AppleLanguageId::ARMENIAN:
    2907           0 :             nLang = LANGUAGE_ARMENIAN;
    2908           0 :             break;
    2909             :         case AppleLanguageId::GEORGIAN:
    2910           0 :             nLang = LANGUAGE_GEORGIAN;
    2911           0 :             break;
    2912             :         case AppleLanguageId::MOLDAVIAN:
    2913           0 :             nLang = LANGUAGE_ROMANIAN_MOLDOVA;
    2914           0 :             break;
    2915             :         case AppleLanguageId::KIRGHIZ:
    2916           0 :             nLang = LANGUAGE_KIRGHIZ;
    2917           0 :             break;
    2918             :         case AppleLanguageId::TAJIKI:
    2919           0 :             nLang = LANGUAGE_TAJIK;
    2920           0 :             break;
    2921             :         case AppleLanguageId::TURKMEN:
    2922           0 :             nLang = LANGUAGE_TURKMEN;
    2923           0 :             break;
    2924             :         case AppleLanguageId::MONGOLIAN_MONGOLIAN:
    2925           0 :             nLang = LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA;
    2926           0 :             break;
    2927             :         case AppleLanguageId::MONGOLIAN_CYRILLIC:
    2928           0 :             nLang = LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA;
    2929           0 :             break;
    2930             :         case AppleLanguageId::PASHTO:
    2931           0 :             nLang = LANGUAGE_PASHTO;
    2932           0 :             break;
    2933             :         case AppleLanguageId::KURDISH:
    2934           0 :             nLang = LANGUAGE_USER_KURDISH_TURKEY; //maybe
    2935           0 :             break;
    2936             :         case AppleLanguageId::KASHMIRI:
    2937           0 :             nLang = LANGUAGE_KASHMIRI;
    2938           0 :             break;
    2939             :         case AppleLanguageId::SINDHI:
    2940           0 :             nLang = LANGUAGE_SINDHI;
    2941           0 :             break;
    2942             :         case AppleLanguageId::TIBETAN:
    2943           0 :             nLang = LANGUAGE_TIBETAN;
    2944           0 :             break;
    2945             :         case AppleLanguageId::NEPALI:
    2946           0 :             nLang = LANGUAGE_NEPALI;
    2947           0 :             break;
    2948             :         case AppleLanguageId::SANSKRIT:
    2949           0 :             nLang = LANGUAGE_SANSKRIT;
    2950           0 :             break;
    2951             :         case AppleLanguageId::MARATHI:
    2952           0 :             nLang = LANGUAGE_MARATHI;
    2953           0 :             break;
    2954             :         case AppleLanguageId::BENGALI:
    2955           0 :             nLang = LANGUAGE_BENGALI;
    2956           0 :             break;
    2957             :         case AppleLanguageId::ASSAMESE:
    2958           0 :             nLang = LANGUAGE_ASSAMESE;
    2959           0 :             break;
    2960             :         case AppleLanguageId::GUJARATI:
    2961           0 :             nLang = LANGUAGE_GUJARATI;
    2962           0 :             break;
    2963             :         case AppleLanguageId::PUNJABI:
    2964           0 :             nLang = LANGUAGE_PUNJABI;
    2965           0 :             break;
    2966             :         case AppleLanguageId::ORIYA:
    2967           0 :             nLang = LANGUAGE_ODIA;
    2968           0 :             break;
    2969             :         case AppleLanguageId::MALAYALAM:
    2970           0 :             nLang = LANGUAGE_MALAYALAM;
    2971           0 :             break;
    2972             :         case AppleLanguageId::KANNADA:
    2973           0 :             nLang = LANGUAGE_KANNADA;
    2974           0 :             break;
    2975             :         case AppleLanguageId::TAMIL:
    2976           0 :             nLang = LANGUAGE_TAMIL;
    2977           0 :             break;
    2978             :         case AppleLanguageId::TELUGU:
    2979           0 :             nLang = LANGUAGE_TELUGU;
    2980           0 :             break;
    2981             :         case AppleLanguageId::SINHALESE:
    2982           0 :             nLang = LANGUAGE_SINHALESE_SRI_LANKA;
    2983           0 :             break;
    2984             :         case AppleLanguageId::BURMESE:
    2985           0 :             nLang = LANGUAGE_BURMESE;
    2986           0 :             break;
    2987             :         case AppleLanguageId::KHMER:
    2988           0 :             nLang = LANGUAGE_KHMER;
    2989           0 :             break;
    2990             :         case AppleLanguageId::LAO:
    2991           0 :             nLang = LANGUAGE_LAO;
    2992           0 :             break;
    2993             :         case AppleLanguageId::VIETNAMESE:
    2994           0 :             nLang = LANGUAGE_VIETNAMESE;
    2995           0 :             break;
    2996             :         case AppleLanguageId::INDONESIAN:
    2997           0 :             nLang = LANGUAGE_INDONESIAN;
    2998           0 :             break;
    2999             :         case AppleLanguageId::TAGALONG:
    3000           0 :             nLang = LANGUAGE_USER_TAGALOG;
    3001           0 :             break;
    3002             :         case AppleLanguageId::MALAY_LATIN:
    3003           0 :             nLang = LANGUAGE_MALAY_MALAYSIA;
    3004           0 :             break;
    3005             :         case AppleLanguageId::MALAY_ARABIC:
    3006           0 :             return LanguageTag("ms-Arab");
    3007             :             break;
    3008             :         case AppleLanguageId::AMHARIC:
    3009           0 :             nLang = LANGUAGE_AMHARIC_ETHIOPIA;
    3010           0 :             break;
    3011             :         case AppleLanguageId::TIGRINYA:
    3012           0 :             nLang = LANGUAGE_TIGRIGNA_ETHIOPIA;
    3013           0 :             break;
    3014             :         case AppleLanguageId::GALLA:
    3015           0 :             nLang = LANGUAGE_OROMO;
    3016           0 :             break;
    3017             :         case AppleLanguageId::SOMALI:
    3018           0 :             nLang = LANGUAGE_SOMALI;
    3019           0 :             break;
    3020             :         case AppleLanguageId::SWAHILI:
    3021           0 :             nLang = LANGUAGE_SWAHILI;
    3022           0 :             break;
    3023             :         case AppleLanguageId::KINYARWANDA:
    3024           0 :             nLang = LANGUAGE_KINYARWANDA_RWANDA;
    3025           0 :             break;
    3026             :         case AppleLanguageId::RUNDI:
    3027           0 :             return LanguageTag("rn");
    3028             :             break;
    3029             :         case AppleLanguageId::NYANJA:
    3030           0 :             nLang = LANGUAGE_USER_NYANJA;
    3031           0 :             break;
    3032             :         case AppleLanguageId::MALAGASY:
    3033           0 :             nLang = LANGUAGE_MALAGASY_PLATEAU;
    3034           0 :             break;
    3035             :         case AppleLanguageId::ESPERANTO:
    3036           0 :             nLang = LANGUAGE_USER_ESPERANTO;
    3037           0 :             break;
    3038             :         case AppleLanguageId::WELSH:
    3039           0 :             nLang = LANGUAGE_WELSH;
    3040           0 :             break;
    3041             :         case AppleLanguageId::BASQUE:
    3042           0 :             nLang = LANGUAGE_BASQUE;
    3043           0 :             break;
    3044             :         case AppleLanguageId::CATALAN:
    3045           0 :             nLang = LANGUAGE_CATALAN;
    3046           0 :             break;
    3047             :         case AppleLanguageId::LATIN:
    3048           0 :             nLang = LANGUAGE_USER_LATIN;
    3049           0 :             break;
    3050             :         case AppleLanguageId::QUENCHUA:
    3051           0 :             nLang = LANGUAGE_QUECHUA_BOLIVIA; //maybe
    3052           0 :             break;
    3053             :         case AppleLanguageId::GUARANI:
    3054           0 :             nLang = LANGUAGE_GUARANI_PARAGUAY;
    3055           0 :             break;
    3056             :         case AppleLanguageId::AYMARA:
    3057           0 :             return LanguageTag("ay");
    3058             :             break;
    3059             :         case AppleLanguageId::TATAR:
    3060           0 :             nLang = LANGUAGE_TATAR;
    3061           0 :             break;
    3062             :         case AppleLanguageId::UIGHUR:
    3063           0 :             nLang = LANGUAGE_UIGHUR_CHINA;
    3064           0 :             break;
    3065             :         case AppleLanguageId::DZONGKHA:
    3066           0 :             nLang = LANGUAGE_DZONGKHA;
    3067           0 :             break;
    3068             :         case AppleLanguageId::JAVANESE_LATIN:
    3069           0 :             return LanguageTag("jv-Latn");
    3070             :             break;
    3071             :         case AppleLanguageId::SUNDANESE_LATIN:
    3072           0 :             return LanguageTag("su-Latn");
    3073             :             break;
    3074             :         case AppleLanguageId::GALICIAN:
    3075           0 :             nLang = LANGUAGE_GALICIAN;
    3076           0 :             break;
    3077             :         case AppleLanguageId::AFRIKAANS:
    3078           0 :             nLang = LANGUAGE_AFRIKAANS;
    3079           0 :             break;
    3080             :         case AppleLanguageId::BRETON:
    3081           0 :             nLang = LANGUAGE_BRETON_FRANCE;
    3082           0 :             break;
    3083             :         case AppleLanguageId::INUKTITUT:
    3084           0 :             nLang = LANGUAGE_INUKTITUT_LATIN_CANADA; //probably
    3085           0 :             break;
    3086             :         case AppleLanguageId::SCOTTISH_GAELIC:
    3087           0 :             nLang = LANGUAGE_GAELIC_SCOTLAND;
    3088           0 :             break;
    3089             :         case AppleLanguageId::MANX_GAELIC:
    3090           0 :             nLang = LANGUAGE_USER_MANX;
    3091           0 :             break;
    3092             :         case AppleLanguageId::IRISH_GAELIC_WITH_DOT_ABOVE:
    3093           0 :             return LanguageTag("ga-Latg");
    3094             :             break;
    3095             :         case AppleLanguageId::TONGAN:
    3096           0 :             return LanguageTag("to");
    3097             :             break;
    3098             :         case AppleLanguageId::GREEK_POLYTONIC:
    3099           0 :             nLang = LANGUAGE_USER_ANCIENT_GREEK;
    3100           0 :             break;
    3101             :         case AppleLanguageId::GREENLANDIC:
    3102           0 :             nLang = LANGUAGE_KALAALLISUT_GREENLAND;
    3103           0 :             break;
    3104             :         case AppleLanguageId::AZERI_LATIN:
    3105           0 :             nLang = LANGUAGE_AZERI_LATIN;
    3106           0 :             break;
    3107             :     }
    3108             : 
    3109       10244 :     return LanguageTag(nLang);
    3110             : }
    3111             : 
    3112             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11