LCOV - code coverage report
Current view: top level - i18nlangtag/source/languagetag - languagetag.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 943 1412 66.8 %
Date: 2014-11-03 Functions: 97 109 89.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include <config_folders.h>
      11             : 
      12             : #include "i18nlangtag/languagetag.hxx"
      13             : #include "i18nlangtag/applelangid.hxx"
      14             : #include "i18nlangtag/mslangid.hxx"
      15             : #include <rtl/ustrbuf.hxx>
      16             : #include <rtl/bootstrap.hxx>
      17             : #include <osl/file.hxx>
      18             : #include <rtl/instance.hxx>
      19             : #include <rtl/locale.h>
      20             : #include <boost/unordered_set.hpp>
      21             : #include <map>
      22             : 
      23             : //#define erDEBUG
      24             : 
      25             : #if defined(ENABLE_LIBLANGTAG)
      26             : #include <liblangtag/langtag.h>
      27             : #else
      28             : /* Replacement code for LGPL phobic and Android systems.
      29             :  * For iOS we could probably use NSLocale instead, that should have more or
      30             :  * less required functionality. If it is good enough, it could be used for Mac
      31             :  * OS X, too.
      32             :  */
      33             : #include "simple-langtag.cxx"
      34             : #endif
      35             : 
      36             : using namespace com::sun::star;
      37             : 
      38             : 
      39             : // Helper to ensure lt_error_t is free'd
      40             : struct myLtError
      41             : {
      42             :     lt_error_t* p;
      43          62 :     myLtError() : p(NULL) {}
      44          62 :     ~myLtError() { if (p) lt_error_unref( p); }
      45             : };
      46             : 
      47             : // "statics" to be returned as const reference to an empty locale and string.
      48             : namespace {
      49             : struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
      50             : struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
      51             : }
      52             : 
      53             : typedef ::boost::unordered_set< OUString, OUStringHash > KnownTagSet;
      54             : namespace {
      55             : struct theKnowns : public rtl::Static< KnownTagSet, theKnowns > {};
      56             : struct theMutex : public rtl::Static< osl::Mutex, theMutex > {};
      57             : }
      58             : 
      59          28 : static const KnownTagSet & getKnowns()
      60             : {
      61          28 :     KnownTagSet & rKnowns = theKnowns::get();
      62          28 :     if (rKnowns.empty())
      63             :     {
      64          22 :         osl::MutexGuard aGuard( theMutex::get());
      65          22 :         if (rKnowns.empty())
      66             :         {
      67          22 :             ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
      68       36564 :             for (::std::vector< MsLangId::LanguagetagMapping >::const_iterator it( aDefined.begin());
      69       24376 :                     it != aDefined.end(); ++it)
      70             :             {
      71             :                 // Do not use the BCP47 string here to initialize the
      72             :                 // LanguageTag because then canonicalize() would call this
      73             :                 // getKnowns() again..
      74       12166 :                 ::std::vector< OUString > aFallbacks( LanguageTag( (*it).mnLang).getFallbackStrings( true));
      75       40194 :                 for (::std::vector< OUString >::const_iterator fb( aFallbacks.begin()); fb != aFallbacks.end(); ++fb)
      76             :                 {
      77       28028 :                     rKnowns.insert( *fb);
      78             :                 }
      79       12188 :             }
      80          22 :         }
      81             :     }
      82          28 :     return rKnowns;
      83             : }
      84             : 
      85             : 
      86             : namespace {
      87             : struct compareIgnoreAsciiCaseLess
      88             : {
      89     8479942 :     bool operator()( const OUString& r1, const OUString& r2 ) const
      90             :     {
      91     8479942 :         return r1.compareToIgnoreAsciiCase( r2) < 0;
      92             :     }
      93             : };
      94             : typedef ::std::map< OUString, LanguageTag::ImplPtr, compareIgnoreAsciiCaseLess > MapBcp47;
      95             : typedef ::std::map< LanguageType, LanguageTag::ImplPtr > MapLangID;
      96             : struct theMapBcp47 : public rtl::Static< MapBcp47, theMapBcp47 > {};
      97             : struct theMapLangID : public rtl::Static< MapLangID, theMapLangID > {};
      98             : struct theDontKnow : public rtl::Static< LanguageTag::ImplPtr, theDontKnow > {};
      99             : struct theSystemLocale : public rtl::Static< LanguageTag::ImplPtr, theSystemLocale > {};
     100             : }
     101             : 
     102             : 
     103         254 : static LanguageType getNextOnTheFlyLanguage()
     104             : {
     105             :     static LanguageType nOnTheFlyLanguage = 0;
     106         254 :     osl::MutexGuard aGuard( theMutex::get());
     107         254 :     if (!nOnTheFlyLanguage)
     108          66 :         nOnTheFlyLanguage = MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START, LANGUAGE_ON_THE_FLY_START);
     109             :     else
     110             :     {
     111         188 :         if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage) != LANGUAGE_ON_THE_FLY_END)
     112         188 :             ++nOnTheFlyLanguage;
     113             :         else
     114             :         {
     115           0 :             LanguageType nSub = MsLangId::getSubLanguage( nOnTheFlyLanguage);
     116           0 :             if (nSub != LANGUAGE_ON_THE_FLY_SUB_END)
     117           0 :                 nOnTheFlyLanguage = MsLangId::makeLangID( ++nSub, LANGUAGE_ON_THE_FLY_START);
     118             :             else
     119             :             {
     120             :                 SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
     121             :                         << ((LANGUAGE_ON_THE_FLY_END - LANGUAGE_ON_THE_FLY_START + 1)
     122             :                             * (LANGUAGE_ON_THE_FLY_SUB_END - LANGUAGE_ON_THE_FLY_SUB_START + 1))
     123             :                         << " consumed?!?)");
     124           0 :                 return 0;
     125             :             }
     126             :         }
     127             :     }
     128             : #if OSL_DEBUG_LEVEL > 0
     129             :     static size_t nOnTheFlies = 0;
     130             :     ++nOnTheFlies;
     131             :     SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies);
     132             : #endif
     133         254 :     return nOnTheFlyLanguage;
     134             : }
     135             : 
     136             : 
     137             : // static
     138      224675 : bool LanguageTag::isOnTheFlyID( LanguageType nLang )
     139             : {
     140      224675 :     LanguageType nPri = MsLangId::getPrimaryLanguage( nLang);
     141      224675 :     LanguageType nSub = MsLangId::getSubLanguage( nLang);
     142             :     return
     143        1110 :         LANGUAGE_ON_THE_FLY_START <= nPri && nPri <= LANGUAGE_ON_THE_FLY_END &&
     144      225785 :         LANGUAGE_ON_THE_FLY_SUB_START <= nSub && nSub <= LANGUAGE_ON_THE_FLY_SUB_END;
     145             : }
     146             : 
     147             : 
     148             : /** A reference holder for liblangtag data de/initialization, one static
     149             :     instance. Currently implemented such that the first "ref" inits and dtor
     150             :     (our library deinitialized) tears down.
     151             : */
     152             : class LiblantagDataRef
     153             : {
     154             : public:
     155             :     LiblantagDataRef();
     156             :     ~LiblantagDataRef();
     157          58 :     inline void incRef()
     158             :     {
     159          58 :         if (mnRef != SAL_MAX_UINT32 && !mnRef++)
     160          22 :             setup();
     161          58 :     }
     162          80 :     inline void decRef()
     163             :     {
     164          80 :         if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef)
     165          22 :             teardown();
     166          80 :     }
     167             : private:
     168             :     OString maDataPath;   // path to liblangtag data, "|" if system
     169             :     sal_uInt32   mnRef;
     170             : 
     171             :     void setupDataPath();
     172             :     void setup();
     173             :     void teardown();
     174             : };
     175             : 
     176             : namespace {
     177             : struct theDataRef : public rtl::Static< LiblantagDataRef, theDataRef > {};
     178             : }
     179             : 
     180          22 : LiblantagDataRef::LiblantagDataRef()
     181             :     :
     182          22 :         mnRef(0)
     183             : {
     184          22 : }
     185             : 
     186          44 : LiblantagDataRef::~LiblantagDataRef()
     187             : {
     188             :     // When destructed we're tearing down unconditionally.
     189          22 :     if (mnRef)
     190          22 :         mnRef = 1;
     191          22 :     decRef();
     192          22 : }
     193             : 
     194          22 : void LiblantagDataRef::setup()
     195             : {
     196             :     SAL_INFO( "i18nlangtag", "LiblantagDataRef::setup: initializing database");
     197          22 :     if (maDataPath.isEmpty())
     198          22 :         setupDataPath();
     199          22 :     lt_db_initialize();
     200             :     // Hold ref eternally.
     201          22 :     mnRef = SAL_MAX_UINT32;
     202          22 : }
     203             : 
     204          22 : void LiblantagDataRef::teardown()
     205             : {
     206             :     SAL_INFO( "i18nlangtag", "LiblantagDataRef::teardown: finalizing database");
     207          22 :     lt_db_finalize();
     208          22 : }
     209             : 
     210          22 : void LiblantagDataRef::setupDataPath()
     211             : {
     212             :     // maDataPath is assumed to be empty here.
     213          22 :     OUString aURL("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/liblangtag");
     214          22 :     rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
     215             : 
     216             :     // Check if data is in our own installation, else assume system
     217             :     // installation.
     218          44 :     OUString aData( aURL);
     219          22 :     aData += "/language-subtag-registry.xml";
     220          44 :     osl::DirectoryItem aDirItem;
     221          22 :     if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
     222             :     {
     223           0 :         OUString aPath;
     224           0 :         if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
     225           0 :             maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
     226             :     }
     227          22 :     if (maDataPath.isEmpty())
     228          22 :         maDataPath = "|";   // assume system
     229             :     else
     230          22 :         lt_db_set_datadir( maDataPath.getStr());
     231          22 : }
     232             : 
     233             : 
     234             : /* TODO: we could transform known vendor and browser-specific variants to known
     235             :  * BCP 47 if available. For now just remove them to not confuse any later
     236             :  * treatments that check for empty variants. This vendor stuff was never
     237             :  * supported anyway. */
     238      734994 : static void handleVendorVariant( com::sun::star::lang::Locale & rLocale )
     239             : {
     240      734994 :     if (!rLocale.Variant.isEmpty() && rLocale.Language != I18NLANGTAG_QLT)
     241          14 :         rLocale.Variant = OUString();
     242      734994 : }
     243             : 
     244             : 
     245             : class LanguageTagImpl
     246             : {
     247             : public:
     248             : 
     249             :     explicit LanguageTagImpl( const LanguageTag & rLanguageTag );
     250             :     explicit LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl );
     251             :     ~LanguageTagImpl();
     252             :     LanguageTagImpl& operator=( const LanguageTagImpl & rLanguageTagImpl );
     253             : 
     254             : private:
     255             : 
     256             :     friend class LanguageTag;
     257             : 
     258             :     enum Decision
     259             :     {
     260             :         DECISION_DONTKNOW,
     261             :         DECISION_NO,
     262             :         DECISION_YES
     263             :     };
     264             : 
     265             :     mutable com::sun::star::lang::Locale    maLocale;
     266             :     mutable OUString                        maBcp47;
     267             :     mutable OUString                        maCachedLanguage;   ///< cache getLanguage()
     268             :     mutable OUString                        maCachedScript;     ///< cache getScript()
     269             :     mutable OUString                        maCachedCountry;    ///< cache getCountry()
     270             :     mutable OUString                        maCachedVariants;   ///< cache getVariants()
     271             :     mutable lt_tag_t*                       mpImplLangtag;      ///< liblangtag pointer
     272             :     mutable LanguageType                    mnLangID;
     273             :     mutable Decision                        meIsValid;
     274             :     mutable Decision                        meIsIsoLocale;
     275             :     mutable Decision                        meIsIsoODF;
     276             :     mutable Decision                        meIsLiblangtagNeeded;   ///< whether processing with liblangtag needed
     277             :             bool                            mbSystemLocale      : 1;
     278             :     mutable bool                            mbInitializedBcp47  : 1;
     279             :     mutable bool                            mbInitializedLocale : 1;
     280             :     mutable bool                            mbInitializedLangID : 1;
     281             :     mutable bool                            mbCachedLanguage    : 1;
     282             :     mutable bool                            mbCachedScript      : 1;
     283             :     mutable bool                            mbCachedCountry     : 1;
     284             :     mutable bool                            mbCachedVariants    : 1;
     285             : 
     286             :     const OUString &    getBcp47() const;
     287             :     OUString            getLanguage() const;
     288             :     OUString            getScript() const;
     289             :     OUString            getCountry() const;
     290             :     OUString            getRegion() const;
     291             :     OUString            getVariants() const;
     292             :     bool                hasScript() const;
     293             : 
     294             :     bool                isIsoLocale() const;
     295             :     bool                isIsoODF() const;
     296             :     bool                isValidBcp47() const;
     297             : 
     298             :     void                convertLocaleToBcp47();
     299             :     void                convertLocaleToLang( bool bAllowOnTheFlyID );
     300             :     void                convertBcp47ToLocale();
     301             :     void                convertBcp47ToLang();
     302             :     void                convertLangToLocale();
     303             :     void                convertLangToBcp47();
     304             : 
     305             :     /** @return whether BCP 47 language tag string was changed. */
     306             :     bool                canonicalize();
     307             : 
     308             :     /** Canonicalize if not yet done and synchronize initialized conversions.
     309             : 
     310             :         @return whether BCP 47 language tag string was changed.
     311             :      */
     312             :     bool                synCanonicalize();
     313             : 
     314             :     OUString            getLanguageFromLangtag();
     315             :     OUString            getScriptFromLangtag();
     316             :     OUString            getRegionFromLangtag();
     317             :     OUString            getVariantsFromLangtag();
     318             : 
     319             :     /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
     320             : 
     321             :         @param  nRegisterID
     322             :                 If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
     323             :                 instead of generating an on-the-fly ID. Implementation may
     324             :                 still generate an ID if the suggested ID is already used for
     325             :                 another language tag.
     326             : 
     327             :         @return NULL if no ID could be obtained or registration failed.
     328             :      */
     329             :     LanguageTag::ImplPtr registerOnTheFly( LanguageType nRegisterID );
     330             : 
     331             :     /** Obtain Language, Script, Country and Variants via simpleExtract() and
     332             :         assign them to the cached variables if successful.
     333             : 
     334             :         @return return of simpleExtract()
     335             :      */
     336             :     bool                cacheSimpleLSCV();
     337             : 
     338             :     enum Extraction
     339             :     {
     340             :         EXTRACTED_NONE,
     341             :         EXTRACTED_LSC,
     342             :         EXTRACTED_LV,
     343             :         EXTRACTED_X,
     344             :         EXTRACTED_X_JOKER
     345             :     };
     346             : 
     347             :     /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
     348             :         portions.
     349             : 
     350             :         Does not check case or content!
     351             : 
     352             :         @return EXTRACTED_LSC if simple tag was detected (i.e. one that
     353             :                 would fulfill the isIsoODF() condition),
     354             :                 EXTRACTED_LV if a tag with variant was detected,
     355             :                 EXTRACTED_X if x-... privateuse tag was detected,
     356             :                 EXTRACTED_X_JOKER if "*" joker was detected,
     357             :                 EXTRACTED_NONE else.
     358             :      */
     359             :     static Extraction   simpleExtract( const OUString& rBcp47,
     360             :                                        OUString& rLanguage,
     361             :                                        OUString& rScript,
     362             :                                        OUString& rCountry,
     363             :                                        OUString& rVariants );
     364             : 
     365             :     /** Convert Locale to BCP 47 string without resolving system and creating
     366             :         temporary LanguageTag instances. */
     367             :     static OUString     convertToBcp47( const com::sun::star::lang::Locale& rLocale );
     368             : };
     369             : 
     370             : 
     371      251874 : LanguageTagImpl::LanguageTagImpl( const LanguageTag & rLanguageTag )
     372             :     :
     373             :         maLocale( rLanguageTag.maLocale),
     374             :         maBcp47( rLanguageTag.maBcp47),
     375             :         mpImplLangtag( NULL),
     376             :         mnLangID( rLanguageTag.mnLangID),
     377             :         meIsValid( DECISION_DONTKNOW),
     378             :         meIsIsoLocale( DECISION_DONTKNOW),
     379             :         meIsIsoODF( DECISION_DONTKNOW),
     380             :         meIsLiblangtagNeeded( DECISION_DONTKNOW),
     381             :         mbSystemLocale( rLanguageTag.mbSystemLocale),
     382             :         mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
     383             :         mbInitializedLocale( rLanguageTag.mbInitializedLocale),
     384             :         mbInitializedLangID( rLanguageTag.mbInitializedLangID),
     385             :         mbCachedLanguage( false),
     386             :         mbCachedScript( false),
     387             :         mbCachedCountry( false),
     388      251874 :         mbCachedVariants( false)
     389             : {
     390      251874 : }
     391             : 
     392             : 
     393           0 : LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl )
     394             :     :
     395             :         maLocale( rLanguageTagImpl.maLocale),
     396             :         maBcp47( rLanguageTagImpl.maBcp47),
     397             :         maCachedLanguage( rLanguageTagImpl.maCachedLanguage),
     398             :         maCachedScript( rLanguageTagImpl.maCachedScript),
     399             :         maCachedCountry( rLanguageTagImpl.maCachedCountry),
     400             :         maCachedVariants( rLanguageTagImpl.maCachedVariants),
     401             :         mpImplLangtag( rLanguageTagImpl.mpImplLangtag ?
     402           0 :                 lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : NULL),
     403             :         mnLangID( rLanguageTagImpl.mnLangID),
     404             :         meIsValid( rLanguageTagImpl.meIsValid),
     405             :         meIsIsoLocale( rLanguageTagImpl.meIsIsoLocale),
     406             :         meIsIsoODF( rLanguageTagImpl.meIsIsoODF),
     407             :         meIsLiblangtagNeeded( rLanguageTagImpl.meIsLiblangtagNeeded),
     408             :         mbSystemLocale( rLanguageTagImpl.mbSystemLocale),
     409             :         mbInitializedBcp47( rLanguageTagImpl.mbInitializedBcp47),
     410             :         mbInitializedLocale( rLanguageTagImpl.mbInitializedLocale),
     411             :         mbInitializedLangID( rLanguageTagImpl.mbInitializedLangID),
     412             :         mbCachedLanguage( rLanguageTagImpl.mbCachedLanguage),
     413             :         mbCachedScript( rLanguageTagImpl.mbCachedScript),
     414             :         mbCachedCountry( rLanguageTagImpl.mbCachedCountry),
     415           0 :         mbCachedVariants( rLanguageTagImpl.mbCachedVariants)
     416             : {
     417           0 :     if (mpImplLangtag)
     418           0 :         theDataRef::get().incRef();
     419           0 : }
     420             : 
     421             : 
     422           0 : LanguageTagImpl& LanguageTagImpl::operator=( const LanguageTagImpl & rLanguageTagImpl )
     423             : {
     424           0 :     if (&rLanguageTagImpl == this)
     425           0 :         return *this;
     426             : 
     427           0 :     maLocale            = rLanguageTagImpl.maLocale;
     428           0 :     maBcp47             = rLanguageTagImpl.maBcp47;
     429           0 :     maCachedLanguage    = rLanguageTagImpl.maCachedLanguage;
     430           0 :     maCachedScript      = rLanguageTagImpl.maCachedScript;
     431           0 :     maCachedCountry     = rLanguageTagImpl.maCachedCountry;
     432           0 :     maCachedVariants    = rLanguageTagImpl.maCachedVariants;
     433           0 :     lt_tag_t * oldTag = mpImplLangtag;
     434             :     mpImplLangtag       = rLanguageTagImpl.mpImplLangtag ?
     435           0 :                             lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : NULL;
     436           0 :     lt_tag_unref(oldTag);
     437           0 :     mnLangID            = rLanguageTagImpl.mnLangID;
     438           0 :     meIsValid           = rLanguageTagImpl.meIsValid;
     439           0 :     meIsIsoLocale       = rLanguageTagImpl.meIsIsoLocale;
     440           0 :     meIsIsoODF          = rLanguageTagImpl.meIsIsoODF;
     441           0 :     meIsLiblangtagNeeded= rLanguageTagImpl.meIsLiblangtagNeeded;
     442           0 :     mbSystemLocale      = rLanguageTagImpl.mbSystemLocale;
     443           0 :     mbInitializedBcp47  = rLanguageTagImpl.mbInitializedBcp47;
     444           0 :     mbInitializedLocale = rLanguageTagImpl.mbInitializedLocale;
     445           0 :     mbInitializedLangID = rLanguageTagImpl.mbInitializedLangID;
     446           0 :     mbCachedLanguage    = rLanguageTagImpl.mbCachedLanguage;
     447           0 :     mbCachedScript      = rLanguageTagImpl.mbCachedScript;
     448           0 :     mbCachedCountry     = rLanguageTagImpl.mbCachedCountry;
     449           0 :     mbCachedVariants    = rLanguageTagImpl.mbCachedVariants;
     450           0 :     if (mpImplLangtag && !oldTag)
     451           0 :         theDataRef::get().incRef();
     452           0 :     else if (!mpImplLangtag && oldTag)
     453           0 :         theDataRef::get().decRef();
     454           0 :     return *this;
     455             : }
     456             : 
     457             : 
     458      503434 : LanguageTagImpl::~LanguageTagImpl()
     459             : {
     460      251717 :     if (mpImplLangtag)
     461             :     {
     462          40 :         lt_tag_unref( mpImplLangtag);
     463          40 :         theDataRef::get().decRef();
     464             :     }
     465      251717 : }
     466             : 
     467             : 
     468      416924 : LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
     469             :     :
     470             :         maBcp47( rBcp47LanguageTag),
     471             :         mnLangID( LANGUAGE_DONTKNOW),
     472      416924 :         mbSystemLocale( rBcp47LanguageTag.isEmpty()),
     473      416924 :         mbInitializedBcp47( !mbSystemLocale),
     474             :         mbInitializedLocale( false),
     475             :         mbInitializedLangID( false),
     476     1250772 :         mbIsFallback( false)
     477             : {
     478      416924 :     if (bCanonicalize)
     479             :     {
     480       15603 :         getImpl()->canonicalize();
     481             :         // Registration itself may already have canonicalized, so do an
     482             :         // unconditional sync.
     483       15603 :         syncFromImpl();
     484             :     }
     485             : 
     486      416924 : }
     487             : 
     488             : 
     489      734328 : LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
     490             :     :
     491             :         maLocale( rLocale),
     492             :         mnLangID( LANGUAGE_DONTKNOW),
     493      734328 :         mbSystemLocale( rLocale.Language.isEmpty()),
     494             :         mbInitializedBcp47( false),
     495      734328 :         mbInitializedLocale( !mbSystemLocale),
     496             :         mbInitializedLangID( false),
     497     2202984 :         mbIsFallback( false)
     498             : {
     499      734328 :     handleVendorVariant( maLocale);
     500      734328 : }
     501             : 
     502             : 
     503     1030781 : LanguageTag::LanguageTag( LanguageType nLanguage )
     504             :     :
     505             :         mnLangID( nLanguage),
     506     1030781 :         mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
     507             :         mbInitializedBcp47( false),
     508             :         mbInitializedLocale( false),
     509     1030781 :         mbInitializedLangID( !mbSystemLocale),
     510     3092343 :         mbIsFallback( false)
     511             : {
     512     1030781 : }
     513             : 
     514             : 
     515        1522 : LanguageTag::LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
     516             :                           const OUString& rScript, const OUString& rCountry )
     517             :     :
     518             :         maBcp47( rBcp47),
     519             :         mnLangID( LANGUAGE_DONTKNOW),
     520        1522 :         mbSystemLocale( rBcp47.isEmpty() && rLanguage.isEmpty()),
     521        1522 :         mbInitializedBcp47( !rBcp47.isEmpty()),
     522             :         mbInitializedLocale( false),
     523             :         mbInitializedLangID( false),
     524        4566 :         mbIsFallback( false)
     525             : {
     526        1522 :     if (!mbSystemLocale && !mbInitializedBcp47)
     527             :     {
     528        1522 :         if (rScript.isEmpty())
     529             :         {
     530        1522 :             maBcp47 = rLanguage + "-" + rCountry;
     531        1522 :             mbInitializedBcp47 = true;
     532        1522 :             maLocale.Language = rLanguage;
     533        1522 :             maLocale.Country  = rCountry;
     534        1522 :             mbInitializedLocale = true;
     535             :         }
     536             :         else
     537             :         {
     538           0 :             if (rCountry.isEmpty())
     539           0 :                 maBcp47 = rLanguage + "-" + rScript;
     540             :             else
     541           0 :                 maBcp47 = rLanguage + "-" + rScript + "-" + rCountry;
     542           0 :             mbInitializedBcp47 = true;
     543           0 :             maLocale.Language = I18NLANGTAG_QLT;
     544           0 :             maLocale.Country  = rCountry;
     545           0 :             maLocale.Variant  = maBcp47;
     546           0 :             mbInitializedLocale = true;
     547             :         }
     548             :     }
     549        1522 : }
     550             : 
     551             : 
     552         306 : LanguageTag::LanguageTag( const rtl_Locale & rLocale )
     553             :     :
     554             :         maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
     555             :         mnLangID( LANGUAGE_DONTKNOW),
     556         306 :         mbSystemLocale( maLocale.Language.isEmpty()),
     557             :         mbInitializedBcp47( false),
     558         306 :         mbInitializedLocale( !mbSystemLocale),
     559             :         mbInitializedLangID( false),
     560         918 :         mbIsFallback( false)
     561             : {
     562         306 :     convertFromRtlLocale();
     563         306 : }
     564             : 
     565             : 
     566    20538230 : LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
     567             :     :
     568             :         maLocale( rLanguageTag.maLocale),
     569             :         maBcp47( rLanguageTag.maBcp47),
     570             :         mnLangID( rLanguageTag.mnLangID),
     571             :         mpImpl( rLanguageTag.mpImpl),
     572             :         mbSystemLocale( rLanguageTag.mbSystemLocale),
     573             :         mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
     574             :         mbInitializedLocale( rLanguageTag.mbInitializedLocale),
     575             :         mbInitializedLangID( rLanguageTag.mbInitializedLangID),
     576    20538230 :         mbIsFallback(rLanguageTag.mbIsFallback)
     577             : {
     578    20538230 : }
     579             : 
     580             : 
     581      184993 : LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
     582             : {
     583      184993 :     if (&rLanguageTag == this)
     584           0 :         return *this;
     585             : 
     586      184993 :     maLocale            = rLanguageTag.maLocale;
     587      184993 :     maBcp47             = rLanguageTag.maBcp47;
     588      184993 :     mnLangID            = rLanguageTag.mnLangID;
     589      184993 :     mpImpl              = rLanguageTag.mpImpl;
     590      184993 :     mbSystemLocale      = rLanguageTag.mbSystemLocale;
     591      184993 :     mbInitializedBcp47  = rLanguageTag.mbInitializedBcp47;
     592      184993 :     mbInitializedLocale = rLanguageTag.mbInitializedLocale;
     593      184993 :     mbInitializedLangID = rLanguageTag.mbInitializedLangID;
     594      184993 :     return *this;
     595             : }
     596             : 
     597             : 
     598    22712425 : LanguageTag::~LanguageTag()
     599             : {
     600    22712425 : }
     601             : 
     602             : 
     603        7716 : LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID )
     604             : {
     605        7716 :     LanguageTag::ImplPtr pImpl;
     606             : 
     607        7716 :     if (!mbInitializedBcp47)
     608             :     {
     609           0 :         if (mbInitializedLocale)
     610             :         {
     611           0 :             maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
     612           0 :             mbInitializedBcp47 = !maBcp47.isEmpty();
     613             :         }
     614             :     }
     615        7716 :     if (maBcp47.isEmpty())
     616             :     {
     617             :         SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
     618           0 :         return pImpl;
     619             :     }
     620             : 
     621       15432 :     osl::MutexGuard aGuard( theMutex::get());
     622             : 
     623        7716 :     MapBcp47& rMapBcp47 = theMapBcp47::get();
     624        7716 :     MapBcp47::const_iterator it( rMapBcp47.find( maBcp47));
     625        7716 :     bool bOtherImpl = false;
     626        7716 :     if (it != rMapBcp47.end())
     627             :     {
     628             :         SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47 << "'");
     629        7716 :         pImpl = (*it).second;
     630        7716 :         if (pImpl.get() != this)
     631             :         {
     632             :             // Could happen for example if during registerImpl() the tag was
     633             :             // changed via canonicalize() and the result was already present in
     634             :             // the map before, for example 'bn-Beng' => 'bn'. This specific
     635             :             // case is now taken care of in registerImpl() and doesn't reach
     636             :             // here. However, use the already existing impl if it matches.
     637             :             SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47 << "'");
     638           0 :             *this = *pImpl;     // ensure consistency
     639           0 :             bOtherImpl = true;
     640             :         }
     641             :     }
     642             :     else
     643             :     {
     644             :         SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47 << "'");
     645           0 :         pImpl.reset( new LanguageTagImpl( *this));
     646           0 :         rMapBcp47.insert( ::std::make_pair( maBcp47, pImpl));
     647             :     }
     648             : 
     649        7716 :     if (!bOtherImpl || !pImpl->mbInitializedLangID)
     650             :     {
     651        7716 :         if (nRegisterID == 0 || nRegisterID == LANGUAGE_DONTKNOW)
     652         252 :             nRegisterID = getNextOnTheFlyLanguage();
     653             :         else
     654             :         {
     655             :             // Accept a suggested ID only if it is not mapped yet to something
     656             :             // different, otherwise we would end up with ambiguous assignments
     657             :             // of different language tags, for example for the same primary
     658             :             // LangID with "no", "nb" and "nn".
     659        7464 :             const MapLangID& rMapLangID = theMapLangID::get();
     660        7464 :             MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
     661        7464 :             if (itID != rMapLangID.end())
     662             :             {
     663           2 :                 if ((*itID).second->maBcp47 != maBcp47)
     664             :                 {
     665             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
     666             :                             << ::std::hex << nRegisterID << " for '" << maBcp47 << "' have '"
     667             :                             << (*itID).second->maBcp47 << "'");
     668           2 :                     nRegisterID = getNextOnTheFlyLanguage();
     669             :                 }
     670             :                 else
     671             :                 {
     672             :                     SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
     673             :                             << ::std::hex << nRegisterID << " for '" << maBcp47 << "' already registered");
     674             :                 }
     675             :             }
     676             :         }
     677        7716 :         if (!nRegisterID)
     678             :         {
     679             :             // out of IDs, nothing to register
     680           0 :             return pImpl;
     681             :         }
     682        7716 :         pImpl->mnLangID = nRegisterID;
     683        7716 :         pImpl->mbInitializedLangID = true;
     684        7716 :         if (pImpl.get() != this)
     685             :         {
     686           0 :             mnLangID = nRegisterID;
     687           0 :             mbInitializedLangID = true;
     688             :         }
     689             :     }
     690             : 
     691             :     ::std::pair< MapLangID::const_iterator, bool > res(
     692        7716 :             theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
     693        7716 :     if (res.second)
     694             :     {
     695             :         SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
     696             :                 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
     697             :     }
     698             :     else
     699             :     {
     700             :         SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
     701             :                 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
     702             :                 << (*res.first).second->maBcp47 << "'");
     703             :     }
     704             : 
     705        7716 :     return pImpl;
     706             : }
     707             : 
     708             : // static
     709         472 : void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang )
     710             : {
     711         472 :     if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_SYSTEM)
     712             :     {
     713             :         SAL_WARN( "i18nlangtag",
     714             :                 "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
     715             :                 ::std::hex << nLang);
     716           0 :         return;
     717             :     }
     718             :     SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex << nLang);
     719         472 :     MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang);
     720             :     // Resest system locale to none and let registerImpl() do the rest to
     721             :     // initialize a new one.
     722         472 :     theSystemLocale::get().reset();
     723         472 :     LanguageTag aLanguageTag( LANGUAGE_SYSTEM);
     724         472 :     aLanguageTag.registerImpl();
     725             : }
     726             : 
     727      224671 : static bool lcl_isKnownOnTheFlyID( LanguageType nLang )
     728             : {
     729      459478 :     return nLang != LANGUAGE_DONTKNOW && nLang != LANGUAGE_SYSTEM &&
     730      673743 :         (LanguageTag::isOnTheFlyID( nLang) || (nLang == MsLangId::getPrimaryLanguage( nLang)));
     731             : }
     732             : 
     733             : 
     734     4221127 : LanguageTag::ImplPtr LanguageTag::registerImpl() const
     735             : {
     736             :     // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
     737             :     // here as they access getImpl() and syncFromImpl() and would lead to
     738             :     // recursion. Also do not use the static LanguageTag::convertTo...()
     739             :     // methods as they may create temporary LanguageTag instances. Only
     740             :     // LanguageTagImpl::convertToBcp47(Locale) is ok.
     741             : 
     742     4221127 :     ImplPtr pImpl;
     743             : 
     744             : #if OSL_DEBUG_LEVEL > 0
     745             :     static size_t nCalls = 0;
     746             :     ++nCalls;
     747             :     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls << " calls");
     748             : #endif
     749             : 
     750             :     // Do not register unresolved system locale, also force LangID if system
     751             :     // and take the system locale shortcut if possible.
     752     4221127 :     if (mbSystemLocale)
     753             :     {
     754      167056 :         pImpl = theSystemLocale::get();
     755      167056 :         if (pImpl)
     756             :         {
     757             : #if OSL_DEBUG_LEVEL > 0
     758             :             static size_t nCallsSystem = 0;
     759             :             ++nCallsSystem;
     760             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem << " system calls");
     761             : #endif
     762      166569 :             return pImpl;
     763             :         }
     764         487 :         if (!mbInitializedLangID)
     765             :         {
     766         487 :             mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
     767         487 :             mbInitializedLangID = (mnLangID != LANGUAGE_SYSTEM);
     768             :             SAL_WARN_IF( !mbInitializedLangID, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
     769             :         }
     770             :     }
     771             : 
     772     4054558 :     if (mbInitializedLangID)
     773             :     {
     774     2989184 :         if (mnLangID == LANGUAGE_DONTKNOW)
     775             :         {
     776             :             // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
     777             :             // conversion attempts. At the same time provide a central breakpoint
     778             :             // to inspect such places.
     779     1626352 :             LanguageTag::ImplPtr& rDontKnow = theDontKnow::get();
     780     1626352 :             if (!rDontKnow)
     781         296 :                 rDontKnow.reset( new LanguageTagImpl( *this));
     782     1626352 :             pImpl = rDontKnow;
     783             : #if OSL_DEBUG_LEVEL > 0
     784             :             static size_t nCallsDontKnow = 0;
     785             :             ++nCallsDontKnow;
     786             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow << " DontKnow calls");
     787             : #endif
     788     1626352 :             return pImpl;
     789             :         }
     790             :         else
     791             :         {
     792             :             // A great share are calls for a system equal locale.
     793     1362832 :             pImpl = theSystemLocale::get();
     794     1362832 :             if (pImpl && pImpl->mnLangID == mnLangID)
     795             :             {
     796             : #if OSL_DEBUG_LEVEL > 0
     797             :                 static size_t nCallsSystemEqual = 0;
     798             :                 ++nCallsSystemEqual;
     799             :                 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
     800             :                         << " system equal LangID calls");
     801             : #endif
     802      867896 :                 return pImpl;
     803             :             }
     804             :         }
     805             :     }
     806             : 
     807             :     // Force Bcp47 if not LangID.
     808     1560310 :     if (!mbInitializedLangID && !mbInitializedBcp47 && mbInitializedLocale)
     809             :     {
     810      659091 :         maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
     811      659091 :         mbInitializedBcp47 = !maBcp47.isEmpty();
     812             :     }
     813             : 
     814     1560310 :     if (mbInitializedBcp47)
     815             :     {
     816             :         // A great share are calls for a system equal locale.
     817     1065704 :         pImpl = theSystemLocale::get();
     818     1065704 :         if (pImpl && pImpl->maBcp47 == maBcp47)
     819             :         {
     820             : #if OSL_DEBUG_LEVEL > 0
     821             :             static size_t nCallsSystemEqual = 0;
     822             :             ++nCallsSystemEqual;
     823             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual << " system equal BCP47 calls");
     824             : #endif
     825      427486 :             return pImpl;
     826             :         }
     827             :     }
     828             : 
     829             : #if OSL_DEBUG_LEVEL > 0
     830             :     static size_t nCallsNonSystem = 0;
     831             :     ++nCallsNonSystem;
     832             :     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem << " non-system calls");
     833             : #endif
     834             : 
     835     2265648 :     osl::MutexGuard aGuard( theMutex::get());
     836             : 
     837             : #if OSL_DEBUG_LEVEL > 0
     838             :     static long nRunning = 0;
     839             :     // Entering twice here is ok, which is needed for fallback init in
     840             :     // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
     841             :     // everything else is suspicious.
     842             :     SAL_WARN_IF( nRunning > 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
     843             :             << maBcp47 << "' 0x" << ::std::hex << mnLangID );
     844             :     struct Runner { Runner() { ++nRunning; } ~Runner() { --nRunning; } } aRunner;
     845             : #endif
     846             : 
     847             :     // Prefer LangID map as find+insert needs less comparison work.
     848     1132824 :     if (mbInitializedLangID)
     849             :     {
     850      494936 :         MapLangID& rMap = theMapLangID::get();
     851      494936 :         MapLangID::const_iterator it( rMap.find( mnLangID));
     852      494936 :         if (it != rMap.end())
     853             :         {
     854             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex << mnLangID);
     855      484452 :             pImpl = (*it).second;
     856             :         }
     857             :         else
     858             :         {
     859             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex << mnLangID);
     860       10484 :             pImpl.reset( new LanguageTagImpl( *this));
     861       10484 :             rMap.insert( ::std::make_pair( mnLangID, pImpl));
     862             :             // Try round-trip.
     863       10484 :             if (!pImpl->mbInitializedLocale)
     864       10462 :                 pImpl->convertLangToLocale();
     865       10484 :             LanguageType nLang = MsLangId::Conversion::convertLocaleToLanguage( pImpl->maLocale);
     866             :             // If round-trip is identical cross-insert to Bcp47 map.
     867       10484 :             if (nLang == pImpl->mnLangID)
     868             :             {
     869        9850 :                 if (!pImpl->mbInitializedBcp47)
     870        9828 :                     pImpl->convertLocaleToBcp47();
     871             :                 ::std::pair< MapBcp47::const_iterator, bool > res(
     872        9850 :                         theMapBcp47::get().insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
     873        9850 :                 if (res.second)
     874             :                 {
     875             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID);
     876             :                 }
     877             :                 else
     878             :                 {
     879             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " have 0x"
     880             :                             << ::std::hex << (*res.first).second->mnLangID);
     881             :                 }
     882             :             }
     883             :             else
     884             :             {
     885         634 :                 if (!pImpl->mbInitializedBcp47)
     886         634 :                     pImpl->convertLocaleToBcp47();
     887             :                 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "' for 0x" << ::std::hex << mnLangID << " round-trip to 0x" << ::std::hex << nLang);
     888             :             }
     889             :         }
     890             :     }
     891      637888 :     else if (!maBcp47.isEmpty())
     892             :     {
     893      637888 :         MapBcp47& rMap = theMapBcp47::get();
     894      637888 :         MapBcp47::const_iterator it( rMap.find( maBcp47));
     895      637888 :         if (it != rMap.end())
     896             :         {
     897             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47 << "'");
     898      396794 :             pImpl = (*it).second;
     899             :         }
     900             :         else
     901             :         {
     902             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47 << "'");
     903      241094 :             pImpl.reset( new LanguageTagImpl( *this));
     904      241094 :             ::std::pair< MapBcp47::iterator, bool > insOrig( rMap.insert( ::std::make_pair( maBcp47, pImpl)));
     905             :             // If changed after canonicalize() also add the resulting tag to
     906             :             // the map.
     907      241094 :             if (pImpl->synCanonicalize())
     908             :             {
     909             :                 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl->maBcp47 << "'");
     910             :                 ::std::pair< MapBcp47::const_iterator, bool > insCanon(
     911       23894 :                         rMap.insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
     912             :                 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon.second ? "" : "not ")
     913             :                         << "inserted '" << pImpl->maBcp47 << "'");
     914             :                 // If the canonicalized tag already existed (was not inserted)
     915             :                 // and impls are different, make this impl that impl and skip
     916             :                 // the rest if that LangID is present as well. The existing
     917             :                 // entry may or may not be different, it may even be strictly
     918             :                 // identical to this if it differs only in case (e.g. ko-kr =>
     919             :                 // ko-KR) which was corrected in canonicalize() hence also in
     920             :                 // the map entry but comparison is case insensitive and found
     921             :                 // it again.
     922       23894 :                 if (!insCanon.second && (*insCanon.first).second != pImpl)
     923             :                 {
     924       16423 :                     (*insOrig.first).second = pImpl = (*insCanon.first).second;
     925             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
     926             :                             << ::std::hex << pImpl->mnLangID);
     927             :                 }
     928             :             }
     929      241094 :             if (!pImpl->mbInitializedLangID)
     930             :             {
     931             :                 // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
     932      224671 :                 if (!pImpl->mbInitializedLocale)
     933      224493 :                     pImpl->convertBcp47ToLocale();
     934      224671 :                 if (!pImpl->mbInitializedLangID)
     935      224671 :                     pImpl->convertLocaleToLang( true);
     936             :                 // Unconditionally insert (round-trip is possible) for
     937             :                 // on-the-fly IDs and (generated or not) suggested IDs.
     938      224671 :                 bool bInsert = lcl_isKnownOnTheFlyID( pImpl->mnLangID);
     939      224671 :                 OUString aBcp47;
     940      224671 :                 if (!bInsert)
     941             :                 {
     942      214527 :                     if (pImpl->mnLangID != LANGUAGE_DONTKNOW)
     943             :                     {
     944             :                         // May have involved canonicalize(), so compare with
     945             :                         // pImpl->maBcp47 instead of maBcp47!
     946      429038 :                         aBcp47 = LanguageTagImpl::convertToBcp47(
     947      429038 :                                 MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true));
     948      214519 :                         bInsert = (aBcp47 == pImpl->maBcp47);
     949             :                     }
     950             :                 }
     951             :                 // If round-trip is identical cross-insert to Bcp47 map.
     952      224671 :                 if (bInsert)
     953             :                 {
     954             :                     ::std::pair< MapLangID::const_iterator, bool > res(
     955      212315 :                             theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
     956      212315 :                     if (res.second)
     957             :                     {
     958             :                         SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
     959             :                                 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
     960             :                     }
     961             :                     else
     962             :                     {
     963             :                         SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
     964             :                                 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
     965             :                                 << (*res.first).second->maBcp47 << "'");
     966             :                     }
     967             :                 }
     968             :                 else
     969             :                 {
     970             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
     971             :                             << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' round-trip to '"
     972             :                             << aBcp47 << "'");
     973      224671 :                 }
     974             :             }
     975             :         }
     976             :     }
     977             :     else
     978             :     {
     979             :         SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex << mnLangID );
     980           0 :         pImpl.reset( new LanguageTagImpl( *this));
     981             :     }
     982             : 
     983             :     // If we reach here for mbSystemLocale we didn't have theSystemLocale
     984             :     // above, so add it.
     985     1132824 :     if (mbSystemLocale && mbInitializedLangID)
     986             :     {
     987         487 :         theSystemLocale::get() = pImpl;
     988             :         SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
     989             :                 << ::std::hex << pImpl->mnLangID << " '" << pImpl->maBcp47 << "'");
     990             :     }
     991             : 
     992     1132824 :     return pImpl;
     993             : }
     994             : 
     995             : 
     996     6527773 : LanguageTag::ImplPtr LanguageTag::getImpl() const
     997             : {
     998     6527773 :     if (!mpImpl)
     999             :     {
    1000     4220325 :         mpImpl = registerImpl();
    1001     4220325 :         syncVarsFromRawImpl();
    1002             :     }
    1003     6527773 :     return mpImpl;
    1004             : }
    1005             : 
    1006             : 
    1007     5617621 : void LanguageTag::resetVars()
    1008             : {
    1009     5617621 :     mpImpl.reset();
    1010     5617621 :     maLocale            = lang::Locale();
    1011     5617621 :     maBcp47             = OUString();
    1012     5617621 :     mnLangID            = LANGUAGE_SYSTEM;
    1013     5617621 :     mbSystemLocale      = true;
    1014     5617621 :     mbInitializedBcp47  = false;
    1015     5617621 :     mbInitializedLocale = false;
    1016     5617621 :     mbInitializedLangID = false;
    1017     5617621 :     mbIsFallback        = false;
    1018     5617621 : }
    1019             : 
    1020             : 
    1021         871 : LanguageTag & LanguageTag::reset( const OUString & rBcp47LanguageTag, bool bCanonicalize )
    1022             : {
    1023         871 :     resetVars();
    1024         871 :     maBcp47             = rBcp47LanguageTag;
    1025         871 :     mbSystemLocale      = rBcp47LanguageTag.isEmpty();
    1026         871 :     mbInitializedBcp47  = !mbSystemLocale;
    1027             : 
    1028         871 :     if (bCanonicalize)
    1029             :     {
    1030           0 :         getImpl()->canonicalize();
    1031             :         // Registration itself may already have canonicalized, so do an
    1032             :         // unconditional sync.
    1033           0 :         syncFromImpl();
    1034             :     }
    1035         871 :     return *this;
    1036             : }
    1037             : 
    1038             : 
    1039         666 : LanguageTag & LanguageTag::reset( const com::sun::star::lang::Locale & rLocale )
    1040             : {
    1041         666 :     resetVars();
    1042         666 :     maLocale            = rLocale;
    1043         666 :     mbSystemLocale      = rLocale.Language.isEmpty();
    1044         666 :     mbInitializedLocale = !mbSystemLocale;
    1045         666 :     handleVendorVariant( maLocale);
    1046         666 :     return *this;
    1047             : }
    1048             : 
    1049             : 
    1050     5616084 : LanguageTag & LanguageTag::reset( LanguageType nLanguage )
    1051             : {
    1052     5616084 :     resetVars();
    1053     5616084 :     mnLangID            = nLanguage;
    1054     5616084 :     mbSystemLocale      = nLanguage == LANGUAGE_SYSTEM;
    1055     5616084 :     mbInitializedLangID = !mbSystemLocale;
    1056     5616084 :     return *this;
    1057             : }
    1058             : 
    1059             : 
    1060      267284 : bool LanguageTagImpl::canonicalize()
    1061             : {
    1062             : #ifdef erDEBUG
    1063             :     // dump once
    1064             :     struct dumper
    1065             :     {
    1066             :         lt_tag_t** mpp;
    1067             :         dumper( lt_tag_t** pp ) : mpp( *pp ? NULL : pp) {}
    1068             :         ~dumper() { if (mpp && *mpp) lt_tag_dump( *mpp); }
    1069             :     };
    1070             :     dumper aDumper( &mpImplLangtag);
    1071             : #endif
    1072             : 
    1073      267284 :     bool bChanged = false;
    1074             : 
    1075             :     // Side effect: have maBcp47 in any case, resolved system.
    1076             :     // Some methods calling canonicalize() (or not calling it due to
    1077             :     // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
    1078             :     // meIsLiblangtagNeeded anywhere else than hereafter.
    1079      267284 :     getBcp47();
    1080             : 
    1081             :     // The simple cases and known locales don't need liblangtag processing,
    1082             :     // which also avoids loading liblangtag data on startup.
    1083      267284 :     if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
    1084             :     {
    1085      251681 :         bool bTemporaryLocale = false;
    1086      251681 :         bool bTemporaryLangID = false;
    1087      251681 :         if (!mbInitializedLocale && !mbInitializedLangID)
    1088             :         {
    1089      240916 :             if (mbSystemLocale)
    1090             :             {
    1091           0 :                 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
    1092           0 :                 mbInitializedLangID = true;
    1093             :             }
    1094             :             else
    1095             :             {
    1096             :                 // Now this is getting funny.. we only have some BCP47 string
    1097             :                 // and want to determine if parsing it would be possible
    1098             :                 // without using liblangtag just to see if it is a simple known
    1099             :                 // locale or could fall back to one.
    1100      481832 :                 OUString aLanguage, aScript, aCountry, aVariants;
    1101      240916 :                 Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
    1102      240916 :                 if (eExt != EXTRACTED_NONE)
    1103             :                 {
    1104      240902 :                     if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV)
    1105             :                     {
    1106             :                         // Rebuild bcp47 with proper casing of tags.
    1107      240874 :                         OUStringBuffer aBuf( aLanguage.getLength() + 1 + aScript.getLength() +
    1108      240874 :                                 1 + aCountry.getLength() + 1 + aVariants.getLength());
    1109      240874 :                         aBuf.append( aLanguage);
    1110      240874 :                         if (!aScript.isEmpty())
    1111       28874 :                             aBuf.append("-" + aScript);
    1112      240874 :                         if (!aCountry.isEmpty())
    1113      208653 :                             aBuf.append("-" + aCountry);
    1114      240874 :                         if (!aVariants.isEmpty())
    1115         904 :                             aBuf.append("-" + aVariants);
    1116      481748 :                         OUString aStr( aBuf.makeStringAndClear());
    1117             : 
    1118      240874 :                         if (maBcp47 != aStr)
    1119             :                         {
    1120        2022 :                             maBcp47 = aStr;
    1121        2022 :                             bChanged = true;
    1122      240874 :                         }
    1123             :                     }
    1124      240902 :                     if (eExt == EXTRACTED_LSC && aScript.isEmpty())
    1125             :                     {
    1126      211096 :                         maLocale.Language = aLanguage;
    1127      211096 :                         maLocale.Country  = aCountry;
    1128             :                     }
    1129             :                     else
    1130             :                     {
    1131       29806 :                         maLocale.Language = I18NLANGTAG_QLT;
    1132       29806 :                         maLocale.Country  = aCountry;
    1133       29806 :                         maLocale.Variant  = maBcp47;
    1134             :                     }
    1135      240902 :                     bTemporaryLocale = mbInitializedLocale = true;
    1136      240916 :                 }
    1137             :             }
    1138             :         }
    1139      251681 :         if (mbInitializedLangID && !mbInitializedLocale)
    1140             :         {
    1141             :             // Do not call getLocale() here because that prefers
    1142             :             // convertBcp47ToLocale() which would end up in recursion via
    1143             :             // isIsoLocale()!
    1144             : 
    1145             :             // Prepare to verify that we have a known locale, not just an
    1146             :             // arbitrary MS-LangID.
    1147           0 :             convertLangToLocale();
    1148             :         }
    1149      251681 :         if (mbInitializedLocale)
    1150             :         {
    1151      251667 :             if (maLocale.Variant.isEmpty())
    1152      220313 :                 meIsLiblangtagNeeded = DECISION_NO;     // per definition ll[l][-CC]
    1153             :             else
    1154             :             {
    1155       31354 :                 if (!mbInitializedLangID)
    1156             :                 {
    1157       29806 :                     convertLocaleToLang( false);
    1158       29806 :                     if (bTemporaryLocale || mnLangID == LANGUAGE_DONTKNOW)
    1159       29806 :                         bTemporaryLangID = true;
    1160             :                 }
    1161       31354 :                 if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
    1162       31326 :                     meIsLiblangtagNeeded = DECISION_NO; // known locale
    1163             :                 else
    1164             :                 {
    1165          28 :                     const KnownTagSet& rKnowns = getKnowns();
    1166          28 :                     if (rKnowns.find( maBcp47) != rKnowns.end())
    1167           2 :                         meIsLiblangtagNeeded = DECISION_NO; // known fallback
    1168             :                 }
    1169             :             }
    1170             :             // We may have an internal override "canonicalization".
    1171      251667 :             lang::Locale aNew( MsLangId::Conversion::getOverride( maLocale));
    1172      517840 :             if (!aNew.Language.isEmpty() &&
    1173      466173 :                     (aNew.Language != maLocale.Language ||
    1174      443975 :                      aNew.Country  != maLocale.Country ||
    1175      221777 :                      aNew.Variant  != maLocale.Variant))
    1176             :             {
    1177       22198 :                 maBcp47 = LanguageTagImpl::convertToBcp47( aNew);
    1178       22198 :                 bChanged = true;
    1179       22198 :                 meIsIsoLocale = DECISION_DONTKNOW;
    1180       22198 :                 meIsIsoODF = DECISION_DONTKNOW;
    1181       22198 :                 meIsLiblangtagNeeded = DECISION_NO; // known locale
    1182      251667 :             }
    1183             :         }
    1184      251681 :         if (bTemporaryLocale)
    1185             :         {
    1186      240902 :             mbInitializedLocale = false;
    1187      240902 :             maLocale = lang::Locale();
    1188             :         }
    1189      251681 :         if (bTemporaryLangID)
    1190             :         {
    1191       29806 :             mbInitializedLangID = false;
    1192       29806 :             mnLangID = LANGUAGE_DONTKNOW;
    1193             :         }
    1194             :     }
    1195      267284 :     if (meIsLiblangtagNeeded == DECISION_NO)
    1196             :     {
    1197      267240 :         meIsValid = DECISION_YES;   // really, known must be valid ...
    1198      267240 :         return bChanged;            // that's it
    1199             :     }
    1200             : 
    1201          44 :     meIsLiblangtagNeeded = DECISION_YES;
    1202             :     SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
    1203             : 
    1204          44 :     if (!mpImplLangtag)
    1205             :     {
    1206          40 :         theDataRef::get().incRef();
    1207          40 :         mpImplLangtag = lt_tag_new();
    1208             :     }
    1209             : 
    1210          44 :     myLtError aError;
    1211             : 
    1212          44 :     if (lt_tag_parse( mpImplLangtag, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
    1213             :     {
    1214          32 :         char* pTag = lt_tag_canonicalize( mpImplLangtag, &aError.p);
    1215             :         SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
    1216          32 :         if (pTag)
    1217             :         {
    1218          32 :             OUString aNew( OUString::createFromAscii( pTag));
    1219             :             // Make the lt_tag_t follow the new string if different, which
    1220             :             // removes default script and such.
    1221          32 :             if (maBcp47 != aNew)
    1222             :             {
    1223           4 :                 maBcp47 = aNew;
    1224           4 :                 bChanged = true;
    1225           4 :                 meIsIsoLocale = DECISION_DONTKNOW;
    1226           4 :                 meIsIsoODF = DECISION_DONTKNOW;
    1227           4 :                 if (!lt_tag_parse( mpImplLangtag, pTag, &aError.p))
    1228             :                 {
    1229             :                     SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '" << maBcp47 << "'");
    1230           0 :                     free( pTag);
    1231           0 :                     meIsValid = DECISION_NO;
    1232           0 :                     return bChanged;
    1233             :                 }
    1234             :             }
    1235          32 :             free( pTag);
    1236          32 :             meIsValid = DECISION_YES;
    1237          32 :             return bChanged;
    1238             :         }
    1239             :     }
    1240             :     else
    1241             :     {
    1242             :         SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
    1243             :     }
    1244          12 :     meIsValid = DECISION_NO;
    1245          12 :     return bChanged;
    1246             : }
    1247             : 
    1248             : 
    1249     1327831 : bool LanguageTagImpl::synCanonicalize()
    1250             : {
    1251     1327831 :     bool bChanged = false;
    1252     1327831 :     if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
    1253             :     {
    1254      251681 :         bChanged = canonicalize();
    1255      251681 :         if (bChanged)
    1256             :         {
    1257       24224 :             if (mbInitializedLocale)
    1258         330 :                 convertBcp47ToLocale();
    1259       24224 :             if (mbInitializedLangID)
    1260         330 :                 convertBcp47ToLang();
    1261             :         }
    1262             :     }
    1263     1327831 :     return bChanged;
    1264             : }
    1265             : 
    1266             : 
    1267      629120 : void LanguageTag::syncFromImpl()
    1268             : {
    1269      629120 :     ImplPtr xImpl = getImpl();
    1270      629120 :     LanguageTagImpl* pImpl = xImpl.get();
    1271      629450 :     bool bRegister = ((mbInitializedBcp47 && maBcp47 != pImpl->maBcp47) ||
    1272     1257910 :             (mbInitializedLangID && mnLangID != pImpl->mnLangID));
    1273             :     SAL_INFO_IF( bRegister, "i18nlangtag",
    1274             :             "LanguageTag::syncFromImpl: re-registering, '" << pImpl->maBcp47 << "' vs '" << maBcp47 <<
    1275             :             " and 0x" << ::std::hex << pImpl->mnLangID << " vs 0x" << ::std::hex << mnLangID);
    1276      629120 :     syncVarsFromRawImpl();
    1277      629120 :     if (bRegister)
    1278         330 :         mpImpl = registerImpl();
    1279      629120 : }
    1280             : 
    1281             : 
    1282     1753798 : void LanguageTag::syncVarsFromImpl() const
    1283             : {
    1284     1753798 :     if (!mpImpl)
    1285     1753798 :         getImpl();      // with side effect syncVarsFromRawImpl()
    1286             :     else
    1287           0 :         syncVarsFromRawImpl();
    1288     1753798 : }
    1289             : 
    1290             : 
    1291     4849445 : void LanguageTag::syncVarsFromRawImpl() const
    1292             : {
    1293             :     // Do not use getImpl() here.
    1294     4849445 :     LanguageTagImpl* pImpl = mpImpl.get();
    1295     4849445 :     if (!pImpl)
    1296     4849445 :         return;
    1297             : 
    1298             :     // Obviously only mutable variables.
    1299     4849445 :     mbInitializedBcp47  = pImpl->mbInitializedBcp47;
    1300     4849445 :     maBcp47             = pImpl->maBcp47;
    1301     4849445 :     mbInitializedLocale = pImpl->mbInitializedLocale;
    1302     4849445 :     maLocale            = pImpl->maLocale;
    1303     4849445 :     mbInitializedLangID = pImpl->mbInitializedLangID;
    1304     4849445 :     mnLangID            = pImpl->mnLangID;
    1305             : }
    1306             : 
    1307             : 
    1308           0 : bool LanguageTag::synCanonicalize()
    1309             : {
    1310           0 :     bool bChanged = getImpl()->synCanonicalize();
    1311           0 :     if (bChanged)
    1312           0 :         syncFromImpl();
    1313           0 :     return bChanged;
    1314             : }
    1315             : 
    1316             : 
    1317       10758 : void LanguageTagImpl::convertLocaleToBcp47()
    1318             : {
    1319       10758 :     if (mbSystemLocale && !mbInitializedLocale)
    1320           0 :         convertLangToLocale();
    1321             : 
    1322       10758 :     if (maLocale.Language.isEmpty())
    1323             :     {
    1324             :         // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
    1325             :         // locale via LanguageTag::convertToBcp47(LanguageType) and
    1326             :         // LanguageTag::convertToLocale(LanguageType) would instanciate another
    1327             :         // LanguageTag.
    1328           0 :         maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, true);
    1329             :     }
    1330       10758 :     if (maLocale.Language.isEmpty())
    1331             :     {
    1332           0 :         maBcp47 = OUString();   // bad luck
    1333             :     }
    1334       10758 :     else if (maLocale.Language == I18NLANGTAG_QLT)
    1335             :     {
    1336        1626 :         maBcp47 = maLocale.Variant;
    1337        1626 :         meIsIsoLocale = DECISION_NO;
    1338             :     }
    1339             :     else
    1340             :     {
    1341        9132 :         maBcp47 = LanguageTag::convertToBcp47( maLocale, true);
    1342             :     }
    1343       10758 :     mbInitializedBcp47 = true;
    1344       10758 : }
    1345             : 
    1346             : 
    1347      254807 : void LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID )
    1348             : {
    1349      254807 :     if (mbSystemLocale)
    1350             :     {
    1351           0 :         mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
    1352             :     }
    1353             :     else
    1354             :     {
    1355      254807 :         mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
    1356      254807 :         if (mnLangID == LANGUAGE_DONTKNOW && bAllowOnTheFlyID)
    1357             :         {
    1358        7724 :             if (isValidBcp47())
    1359             :             {
    1360             :                 // For language-only (including script) look if we know some
    1361             :                 // locale of that language and if so try to use the primary
    1362             :                 // language ID of that instead of generating an on-the-fly ID.
    1363        7716 :                 if (getCountry().isEmpty() && isIsoODF())
    1364             :                 {
    1365        7468 :                     lang::Locale aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale));
    1366             :                     // 'en-US' is last resort, do not use except when looking
    1367             :                     // for 'en'.
    1368        7468 :                     if (aLoc.Language != "en" || getLanguage() == "en")
    1369             :                     {
    1370        7464 :                         mnLangID = MsLangId::Conversion::convertLocaleToLanguage( aLoc);
    1371        7464 :                         if (mnLangID != LANGUAGE_DONTKNOW)
    1372        7464 :                             mnLangID = MsLangId::getPrimaryLanguage( mnLangID);
    1373        7468 :                     }
    1374             :                 }
    1375        7716 :                 registerOnTheFly( mnLangID);
    1376             :             }
    1377             :             else
    1378             :             {
    1379             :                 SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
    1380             :                         << maBcp47 << "'");
    1381             :             }
    1382             :         }
    1383             :     }
    1384      254807 :     mbInitializedLangID = true;
    1385      254807 : }
    1386             : 
    1387             : 
    1388           0 : void LanguageTag::convertLocaleToLang()
    1389             : {
    1390           0 :     getImpl()->convertLocaleToLang( true);
    1391           0 :     syncFromImpl();
    1392           0 : }
    1393             : 
    1394             : 
    1395      224823 : void LanguageTagImpl::convertBcp47ToLocale()
    1396             : {
    1397      224823 :     bool bIso = isIsoLocale();
    1398      224823 :     if (bIso)
    1399             :     {
    1400      200436 :         maLocale.Language = getLanguageFromLangtag();
    1401      200436 :         maLocale.Country = getRegionFromLangtag();
    1402      200436 :         maLocale.Variant = OUString();
    1403             :     }
    1404             :     else
    1405             :     {
    1406       24387 :         maLocale.Language = I18NLANGTAG_QLT;
    1407       24387 :         maLocale.Country = getCountry();
    1408       24387 :         maLocale.Variant = maBcp47;
    1409             :     }
    1410      224823 :     mbInitializedLocale = true;
    1411      224823 : }
    1412             : 
    1413             : 
    1414           0 : void LanguageTag::convertBcp47ToLocale()
    1415             : {
    1416           0 :     getImpl()->convertBcp47ToLocale();
    1417           0 :     syncFromImpl();
    1418           0 : }
    1419             : 
    1420             : 
    1421         330 : void LanguageTagImpl::convertBcp47ToLang()
    1422             : {
    1423         330 :     if (mbSystemLocale)
    1424             :     {
    1425           0 :         mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
    1426             :     }
    1427             :     else
    1428             :     {
    1429         330 :         if (!mbInitializedLocale)
    1430           0 :             convertBcp47ToLocale();
    1431         330 :         convertLocaleToLang( true);
    1432             :     }
    1433         330 :     mbInitializedLangID = true;
    1434         330 : }
    1435             : 
    1436             : 
    1437           0 : void LanguageTag::convertBcp47ToLang()
    1438             : {
    1439           0 :     getImpl()->convertBcp47ToLang();
    1440           0 :     syncFromImpl();
    1441           0 : }
    1442             : 
    1443             : 
    1444       10758 : void LanguageTagImpl::convertLangToLocale()
    1445             : {
    1446       10758 :     if (mbSystemLocale && !mbInitializedLangID)
    1447             :     {
    1448           0 :         mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
    1449           0 :         mbInitializedLangID = true;
    1450             :     }
    1451             :     // Resolve system here! The original is remembered as mbSystemLocale.
    1452       10758 :     maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true);
    1453       10758 :     mbInitializedLocale = true;
    1454       10758 : }
    1455             : 
    1456             : 
    1457           0 : void LanguageTag::convertLangToLocale()
    1458             : {
    1459           0 :     getImpl()->convertLangToLocale();
    1460           0 :     syncFromImpl();
    1461           0 : }
    1462             : 
    1463             : 
    1464         296 : void LanguageTagImpl::convertLangToBcp47()
    1465             : {
    1466         296 :     if (!mbInitializedLocale)
    1467         296 :         convertLangToLocale();
    1468         296 :     convertLocaleToBcp47();
    1469         296 :     mbInitializedBcp47 = true;
    1470         296 : }
    1471             : 
    1472             : 
    1473         306 : void LanguageTag::convertFromRtlLocale()
    1474             : {
    1475             :     // The rtl_Locale follows the Open Group Base Specification,
    1476             :     // 8.2 Internationalization Variables
    1477             :     // language[_territory][.codeset][@modifier]
    1478             :     // On GNU/Linux systems usually being glibc locales.
    1479             :     // sal/osl/unx/nlsupport.c _parse_locale() parses them into
    1480             :     // Language: language               2 or 3 alpha code
    1481             :     // Country: [territory]             2 alpha code
    1482             :     // Variant: [.codeset][@modifier]
    1483             :     // Variant effectively contains anything that follows the territory, not
    1484             :     // looking for '.' dot delimiter or '@' modifier content.
    1485         306 :     if (!maLocale.Variant.isEmpty())
    1486             :     {
    1487         612 :         OString aStr = OUStringToOString( maLocale.Language + "_" + maLocale.Country + maLocale.Variant,
    1488         306 :                 RTL_TEXTENCODING_UTF8);
    1489             :         /* FIXME: let liblangtag parse this entirely with
    1490             :          * lt_tag_convert_from_locale() but that needs a patch to pass the
    1491             :          * string. */
    1492             : #if 0
    1493             :         myLtError aError;
    1494             :         theDataRef::get().incRef();
    1495             :         mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
    1496             :         maBcp47 = OStringToOUString( lt_tag_get_string( mpImplLangtag), RTL_TEXTENCODING_UTF8);
    1497             :         mbInitializedBcp47 = true;
    1498             : #else
    1499         306 :         mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr);
    1500         306 :         if (mnLangID == LANGUAGE_DONTKNOW)
    1501             :         {
    1502             :             SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
    1503           0 :             mnLangID = LANGUAGE_ENGLISH_US;     // we need _something_ here
    1504             :         }
    1505         306 :         mbInitializedLangID = true;
    1506             : #endif
    1507         306 :         maLocale = lang::Locale();
    1508         306 :         mbInitializedLocale = false;
    1509             :     }
    1510         306 : }
    1511             : 
    1512             : 
    1513      267562 : const OUString & LanguageTagImpl::getBcp47() const
    1514             : {
    1515      267562 :     if (!mbInitializedBcp47)
    1516             :     {
    1517         296 :         if (mbInitializedLocale)
    1518           0 :             const_cast<LanguageTagImpl*>(this)->convertLocaleToBcp47();
    1519             :         else
    1520         296 :             const_cast<LanguageTagImpl*>(this)->convertLangToBcp47();
    1521             :     }
    1522      267562 :     return maBcp47;
    1523             : }
    1524             : 
    1525             : 
    1526      487559 : const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
    1527             : {
    1528      487559 :     if (!bResolveSystem && mbSystemLocale)
    1529       12531 :         return theEmptyBcp47::get();
    1530      475028 :     if (!mbInitializedBcp47)
    1531      402241 :         syncVarsFromImpl();
    1532      475028 :     if (!mbInitializedBcp47)
    1533             :     {
    1534         278 :         getImpl()->getBcp47();
    1535         278 :         const_cast<LanguageTag*>(this)->syncFromImpl();
    1536             :     }
    1537      475028 :     return maBcp47;
    1538             : }
    1539             : 
    1540             : 
    1541      411581 : OUString LanguageTagImpl::getLanguageFromLangtag()
    1542             : {
    1543      411581 :     OUString aLanguage;
    1544      411581 :     synCanonicalize();
    1545      411581 :     if (maBcp47.isEmpty())
    1546           0 :         return aLanguage;
    1547      411581 :     if (mpImplLangtag)
    1548             :     {
    1549          34 :         const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag);
    1550             :         SAL_WARN_IF( !pLangT, "i18nlangtag",
    1551             :                 "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'");
    1552          34 :         if (!pLangT)
    1553          22 :             return aLanguage;
    1554          12 :         const char* pLang = lt_lang_get_tag( pLangT);
    1555             :         SAL_WARN_IF( !pLang, "i18nlangtag",
    1556             :                 "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'");
    1557          12 :         if (pLang)
    1558          12 :             aLanguage = OUString::createFromAscii( pLang);
    1559             :     }
    1560             :     else
    1561             :     {
    1562      411547 :         if (mbCachedLanguage || cacheSimpleLSCV())
    1563      411543 :             aLanguage = maCachedLanguage;
    1564             :     }
    1565      411559 :     return aLanguage;
    1566             : }
    1567             : 
    1568             : 
    1569          32 : OUString LanguageTagImpl::getScriptFromLangtag()
    1570             : {
    1571          32 :     OUString aScript;
    1572          32 :     synCanonicalize();
    1573          32 :     if (maBcp47.isEmpty())
    1574           0 :         return aScript;
    1575          32 :     if (mpImplLangtag)
    1576             :     {
    1577          30 :         const lt_script_t* pScriptT = lt_tag_get_script( mpImplLangtag);
    1578             :         // pScriptT==NULL is valid for default scripts
    1579          30 :         if (!pScriptT)
    1580          30 :             return aScript;
    1581           0 :         const char* pScript = lt_script_get_tag( pScriptT);
    1582             :         SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
    1583           0 :         if (pScript)
    1584           0 :             aScript = OUString::createFromAscii( pScript);
    1585             :     }
    1586             :     else
    1587             :     {
    1588           2 :         if (mbCachedScript || cacheSimpleLSCV())
    1589           2 :             aScript = maCachedScript;
    1590             :     }
    1591           2 :     return aScript;
    1592             : }
    1593             : 
    1594             : 
    1595      434030 : OUString LanguageTagImpl::getRegionFromLangtag()
    1596             : {
    1597      434030 :     OUString aRegion;
    1598      434030 :     synCanonicalize();
    1599      434030 :     if (maBcp47.isEmpty())
    1600           0 :         return aRegion;
    1601      434030 :     if (mpImplLangtag)
    1602             :     {
    1603          50 :         const lt_region_t* pRegionT = lt_tag_get_region( mpImplLangtag);
    1604             :         // pRegionT==NULL is valid for language only tags, rough check here
    1605             :         // that does not take sophisticated tags into account that actually
    1606             :         // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
    1607             :         // that ll-CC and lll-CC actually fail.
    1608             :         SAL_WARN_IF( !pRegionT &&
    1609             :                 maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
    1610             :                 maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
    1611             :                 "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'");
    1612          50 :         if (!pRegionT)
    1613          44 :             return aRegion;
    1614           6 :         const char* pRegion = lt_region_get_tag( pRegionT);
    1615             :         SAL_WARN_IF( !pRegion, "i18nlangtag",
    1616             :                 "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'");
    1617           6 :         if (pRegion)
    1618           6 :             aRegion = OUString::createFromAscii( pRegion);
    1619             :     }
    1620             :     else
    1621             :     {
    1622      433980 :         if (mbCachedCountry || cacheSimpleLSCV())
    1623      433974 :             aRegion = maCachedCountry;
    1624             :     }
    1625      433986 :     return aRegion;
    1626             : }
    1627             : 
    1628             : 
    1629           2 : OUString LanguageTagImpl::getVariantsFromLangtag()
    1630             : {
    1631           2 :     OUString aVariants;
    1632           2 :     synCanonicalize();
    1633           2 :     if (maBcp47.isEmpty())
    1634           0 :         return aVariants;
    1635           2 :     if (mpImplLangtag)
    1636             :     {
    1637           2 :         const lt_list_t* pVariantsT = lt_tag_get_variants( mpImplLangtag);
    1638           4 :         for (const lt_list_t* pE = pVariantsT; pE; pE = lt_list_next( pE))
    1639             :         {
    1640           2 :             const lt_variant_t* pVariantT = static_cast<const lt_variant_t*>(lt_list_value( pE));
    1641           2 :             if (pVariantT)
    1642             :             {
    1643           2 :                 const char* p = lt_variant_get_tag( pVariantT);
    1644           2 :                 if (p)
    1645             :                 {
    1646           2 :                     if (aVariants.isEmpty())
    1647           2 :                         aVariants = OUString::createFromAscii( p);
    1648             :                     else
    1649           0 :                         aVariants += "-" + OUString::createFromAscii( p);
    1650             :                 }
    1651             :             }
    1652             :         }
    1653             :     }
    1654             :     else
    1655             :     {
    1656           0 :         if (mbCachedVariants || cacheSimpleLSCV())
    1657           0 :             aVariants = maCachedVariants;
    1658             :     }
    1659           2 :     return aVariants;
    1660             : }
    1661             : 
    1662             : 
    1663     4739789 : const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
    1664             : {
    1665     4739789 :     if (!bResolveSystem && mbSystemLocale)
    1666         900 :         return theEmptyLocale::get();
    1667     4738889 :     if (!mbInitializedLocale)
    1668      719401 :         syncVarsFromImpl();
    1669     4738889 :     if (!mbInitializedLocale)
    1670             :     {
    1671           0 :         if (mbInitializedBcp47)
    1672           0 :             const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
    1673             :         else
    1674           0 :             const_cast<LanguageTag*>(this)->convertLangToLocale();
    1675             :     }
    1676     4738889 :     return maLocale;
    1677             : }
    1678             : 
    1679             : 
    1680    18155968 : LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
    1681             : {
    1682    18155968 :     if (!bResolveSystem && mbSystemLocale)
    1683     1268172 :         return LANGUAGE_SYSTEM;
    1684    16887796 :     if (!mbInitializedLangID)
    1685      632156 :         syncVarsFromImpl();
    1686    16887796 :     if (!mbInitializedLangID)
    1687             :     {
    1688           0 :         if (mbInitializedBcp47)
    1689           0 :             const_cast<LanguageTag*>(this)->convertBcp47ToLang();
    1690             :         else
    1691             :         {
    1692           0 :             const_cast<LanguageTag*>(this)->convertLocaleToLang();
    1693             : 
    1694             :             /* Resolve a locale only unknown due to some redundant information,
    1695             :              * like 'de-Latn-DE' with script tag. Never call canonicalize()
    1696             :              * from within convert...() methods due to possible recursion, so
    1697             :              * do it here. */
    1698           0 :             if ((!mbSystemLocale && mnLangID == LANGUAGE_SYSTEM) || mnLangID == LANGUAGE_DONTKNOW)
    1699           0 :                 const_cast<LanguageTag*>(this)->synCanonicalize();
    1700             :         }
    1701             :     }
    1702    16887796 :     return mnLangID;
    1703             : }
    1704             : 
    1705             : 
    1706           0 : void LanguageTag::getIsoLanguageScriptCountry( OUString& rLanguage, OUString& rScript, OUString& rCountry ) const
    1707             : {
    1708             :     // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
    1709             :     // and getCountry() to work correctly in this context.
    1710           0 :     if (isIsoODF())
    1711             :     {
    1712           0 :         rLanguage = getLanguage();
    1713           0 :         rScript   = getScript();
    1714           0 :         rCountry  = getCountry();
    1715             :     }
    1716             :     else
    1717             :     {
    1718           0 :         rLanguage = (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
    1719           0 :         rScript   = (LanguageTag::isIsoScript(   getScript())   ? getScript()   : OUString());
    1720           0 :         rCountry  = (LanguageTag::isIsoCountry(  getCountry())  ? getCountry()  : OUString());
    1721             :     }
    1722           0 : }
    1723             : 
    1724             : 
    1725             : namespace
    1726             : {
    1727             : 
    1728      480963 : inline bool isLowerAscii( sal_Unicode c )
    1729             : {
    1730      480963 :     return 'a' <= c && c <= 'z';
    1731             : }
    1732             : 
    1733      405756 : inline bool isUpperAscii( sal_Unicode c )
    1734             : {
    1735      405756 :     return 'A' <= c && c <= 'Z';
    1736             : }
    1737             : 
    1738             : }
    1739             : 
    1740             : 
    1741             : // static
    1742      209227 : bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
    1743             : {
    1744             :     /* TODO: ignore case? For now let's see where rubbish is used. */
    1745             :     bool b2chars;
    1746      690258 :     if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) &&
    1747      836770 :             isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
    1748       62577 :             (b2chars || isLowerAscii( rLanguage[2])))
    1749      209181 :         return true;
    1750             :     SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
    1751             :                 (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
    1752             :             (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
    1753             :             "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
    1754          46 :     return false;
    1755             : }
    1756             : 
    1757             : 
    1758             : // static
    1759      233594 : bool LanguageTag::isIsoCountry( const OUString& rRegion )
    1760             : {
    1761             :     /* TODO: ignore case? For now let's see where rubbish is used. */
    1762      670062 :     if (rRegion.isEmpty() ||
    1763      405754 :             (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
    1764      233588 :         return true;
    1765             :     SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
    1766             :             "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
    1767           6 :     return false;
    1768             : }
    1769             : 
    1770             : 
    1771             : // static
    1772        7548 : bool LanguageTag::isIsoScript( const OUString& rScript )
    1773             : {
    1774             :     /* TODO: ignore case? For now let's see where rubbish is used. */
    1775       15104 :     if (rScript.isEmpty() ||
    1776          16 :             (rScript.getLength() == 4 &&
    1777          24 :              isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
    1778          16 :              isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
    1779        7548 :         return true;
    1780             :     SAL_WARN_IF( rScript.getLength() == 4 &&
    1781             :             (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
    1782             :              isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
    1783             :             "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
    1784           0 :     return false;
    1785             : }
    1786             : 
    1787             : 
    1788      219498 : OUString LanguageTagImpl::getLanguage() const
    1789             : {
    1790      219498 :     if (!mbCachedLanguage)
    1791             :     {
    1792      211145 :         maCachedLanguage = const_cast<LanguageTagImpl*>(this)->getLanguageFromLangtag();
    1793      211145 :         mbCachedLanguage = true;
    1794             :     }
    1795      219498 :     return maCachedLanguage;
    1796             : }
    1797             : 
    1798             : 
    1799     2816694 : OUString LanguageTag::getLanguage() const
    1800             : {
    1801     2816694 :     ImplPtr pImpl = getImpl();
    1802     2816694 :     if (pImpl->mbCachedLanguage)
    1803     2806427 :         return pImpl->maCachedLanguage;
    1804       20534 :     OUString aRet( pImpl->getLanguage());
    1805       10267 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1806     2826961 :     return aRet;
    1807             : }
    1808             : 
    1809             : 
    1810        7548 : OUString LanguageTagImpl::getScript() const
    1811             : {
    1812        7548 :     if (!mbCachedScript)
    1813             :     {
    1814          32 :         maCachedScript = const_cast<LanguageTagImpl*>(this)->getScriptFromLangtag();
    1815          32 :         mbCachedScript = true;
    1816             :     }
    1817        7548 :     return maCachedScript;
    1818             : }
    1819             : 
    1820             : 
    1821       47781 : OUString LanguageTag::getScript() const
    1822             : {
    1823       47781 :     ImplPtr pImpl = getImpl();
    1824       47781 :     if (pImpl->mbCachedScript)
    1825       47781 :         return pImpl->maCachedScript;
    1826           0 :     OUString aRet( pImpl->getScript());
    1827           0 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1828       47781 :     return aRet;
    1829             : }
    1830             : 
    1831             : 
    1832       10914 : OUString LanguageTag::getLanguageAndScript() const
    1833             : {
    1834       10914 :     OUString aLanguageScript( getLanguage());
    1835       21828 :     OUString aScript( getScript());
    1836       10914 :     if (!aScript.isEmpty())
    1837             :     {
    1838           6 :         aLanguageScript += "-" + aScript;
    1839             :     }
    1840       21828 :     return aLanguageScript;
    1841             : }
    1842             : 
    1843             : 
    1844       32125 : OUString LanguageTagImpl::getCountry() const
    1845             : {
    1846       32125 :     if (!mbCachedCountry)
    1847             :     {
    1848       24413 :         maCachedCountry = const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
    1849       24413 :         if (!LanguageTag::isIsoCountry( maCachedCountry))
    1850           2 :             maCachedCountry = OUString();
    1851       24413 :         mbCachedCountry = true;
    1852             :     }
    1853       32125 :     return maCachedCountry;
    1854             : }
    1855             : 
    1856             : 
    1857      622481 : OUString LanguageTag::getCountry() const
    1858             : {
    1859      622481 :     ImplPtr pImpl = getImpl();
    1860      622481 :     if (pImpl->mbCachedCountry)
    1861      622459 :         return pImpl->maCachedCountry;
    1862          44 :     OUString aRet( pImpl->getCountry());
    1863          22 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1864      622503 :     return aRet;
    1865             : }
    1866             : 
    1867             : 
    1868      209181 : OUString LanguageTagImpl::getRegion() const
    1869             : {
    1870      209181 :     return const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
    1871             : }
    1872             : 
    1873             : 
    1874          10 : OUString LanguageTagImpl::getVariants() const
    1875             : {
    1876          10 :     if (!mbCachedVariants)
    1877             :     {
    1878           2 :         maCachedVariants = const_cast<LanguageTagImpl*>(this)->getVariantsFromLangtag();
    1879           2 :         mbCachedVariants = true;
    1880             :     }
    1881          10 :     return maCachedVariants;
    1882             : }
    1883             : 
    1884             : 
    1885       39068 : OUString LanguageTag::getVariants() const
    1886             : {
    1887       39068 :     ImplPtr pImpl = getImpl();
    1888       39068 :     if (pImpl->mbCachedVariants)
    1889       39068 :         return pImpl->maCachedVariants;
    1890           0 :     OUString aRet( pImpl->getVariants());
    1891           0 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1892       39068 :     return aRet;
    1893             : }
    1894             : 
    1895             : 
    1896           0 : OUString LanguageTag::getGlibcLocaleString( const OUString & rEncoding ) const
    1897             : {
    1898           0 :     OUString aRet;
    1899           0 :     if (isIsoLocale())
    1900             :     {
    1901           0 :         OUString aCountry( getCountry());
    1902           0 :         if (aCountry.isEmpty())
    1903           0 :             aRet = getLanguage() + rEncoding;
    1904             :         else
    1905           0 :             aRet = getLanguage() + "_" + aCountry + rEncoding;
    1906             :     }
    1907             :     else
    1908             :     {
    1909             :         /* FIXME: use the aImplIsoLangGLIBCModifiersEntries table from
    1910             :          * i18nlangtag/source/isolang/isolang.cxx or let liblangtag handle it.
    1911             :          * So far no code was prepared for anything else than a simple
    1912             :          * language_country locale so we don't lose anything here right now.
    1913             :          * */
    1914             :     }
    1915           0 :     return aRet;
    1916             : }
    1917             : 
    1918             : 
    1919       40670 : bool LanguageTagImpl::hasScript() const
    1920             : {
    1921       40670 :     if (!mbCachedScript)
    1922           0 :         getScript();
    1923       40670 :     return !maCachedScript.isEmpty();
    1924             : }
    1925             : 
    1926             : 
    1927       40670 : bool LanguageTag::hasScript() const
    1928             : {
    1929       40670 :     bool bRet = getImpl()->hasScript();
    1930       40670 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1931       40670 :     return bRet;
    1932             : }
    1933             : 
    1934             : 
    1935      235164 : bool LanguageTagImpl::cacheSimpleLSCV()
    1936             : {
    1937      470328 :     OUString aLanguage, aScript, aCountry, aVariants;
    1938      235164 :     Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
    1939      235164 :     bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV);
    1940      235164 :     if (bRet)
    1941             :     {
    1942      235154 :         maCachedLanguage = aLanguage;
    1943      235154 :         maCachedScript   = aScript;
    1944      235154 :         maCachedCountry  = aCountry;
    1945      235154 :         maCachedVariants = aVariants;
    1946      235154 :         mbCachedLanguage = mbCachedScript = mbCachedCountry = mbCachedVariants = true;
    1947             :     }
    1948      470328 :     return bRet;
    1949             : }
    1950             : 
    1951             : 
    1952      790817 : bool LanguageTagImpl::isIsoLocale() const
    1953             : {
    1954      790817 :     if (meIsIsoLocale == DECISION_DONTKNOW)
    1955             :     {
    1956      233554 :         const_cast<LanguageTagImpl*>(this)->synCanonicalize();
    1957             :         // It must be at most ll-CC or lll-CC
    1958             :         // Do not use getCountry() here, use getRegion() instead.
    1959      467108 :         meIsIsoLocale = ((maBcp47.isEmpty() ||
    1960     1303832 :                     (maBcp47.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
    1961     1118974 :                      LanguageTag::isIsoCountry( getRegion()))) ? DECISION_YES : DECISION_NO);
    1962             :     }
    1963      790817 :     return meIsIsoLocale == DECISION_YES;
    1964             : }
    1965             : 
    1966             : 
    1967      558456 : bool LanguageTag::isIsoLocale() const
    1968             : {
    1969      558456 :     bool bRet = getImpl()->isIsoLocale();
    1970      558456 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1971      558456 :     return bRet;
    1972             : }
    1973             : 
    1974             : 
    1975        8212 : bool LanguageTagImpl::isIsoODF() const
    1976             : {
    1977        8212 :     if (meIsIsoODF == DECISION_DONTKNOW)
    1978             :     {
    1979        7538 :         const_cast<LanguageTagImpl*>(this)->synCanonicalize();
    1980        7538 :         if (!LanguageTag::isIsoScript( getScript()))
    1981             :         {
    1982           0 :             meIsIsoODF = DECISION_NO;
    1983           0 :             return false;
    1984             :         }
    1985             :         // The usual case is lll-CC so simply check that first.
    1986        7538 :         if (isIsoLocale())
    1987             :         {
    1988        7500 :             meIsIsoODF = DECISION_YES;
    1989        7500 :             return true;
    1990             :         }
    1991             :         // If this is not ISO locale for which script must not exist it can
    1992             :         // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
    1993             :         // ll-vvvvvvvv
    1994         168 :         meIsIsoODF = ((maBcp47.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
    1995          92 :                     LanguageTag::isIsoCountry( getRegion()) && LanguageTag::isIsoScript( getScript()) &&
    1996         104 :                     getVariants().isEmpty()) ? DECISION_YES : DECISION_NO);
    1997             :     }
    1998         712 :     return meIsIsoODF == DECISION_YES;
    1999             : }
    2000             : 
    2001             : 
    2002         716 : bool LanguageTag::isIsoODF() const
    2003             : {
    2004         716 :     bool bRet = getImpl()->isIsoODF();
    2005         716 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    2006         716 :     return bRet;
    2007             : }
    2008             : 
    2009             : 
    2010       10832 : bool LanguageTagImpl::isValidBcp47() const
    2011             : {
    2012       10832 :     if (meIsValid == DECISION_DONTKNOW)
    2013             :     {
    2014           0 :         const_cast<LanguageTagImpl*>(this)->synCanonicalize();
    2015             :         SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
    2016             :                 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
    2017             :     }
    2018       10832 :     return meIsValid == DECISION_YES;
    2019             : }
    2020             : 
    2021             : 
    2022        3108 : bool LanguageTag::isValidBcp47() const
    2023             : {
    2024        3108 :     bool bRet = getImpl()->isValidBcp47();
    2025        3108 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    2026        3108 :     return bRet;
    2027             : }
    2028             : 
    2029             : 
    2030             : 
    2031             : 
    2032        5597 : LanguageTag & LanguageTag::makeFallback()
    2033             : {
    2034        5597 :     if (!mbIsFallback)
    2035             :     {
    2036        5597 :         const lang::Locale& rLocale1 = getLocale( true);
    2037        5597 :         lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
    2038       16791 :         if (    rLocale1.Language != aLocale2.Language ||
    2039       10530 :                 rLocale1.Country  != aLocale2.Country ||
    2040        4933 :                 rLocale1.Variant  != aLocale2.Variant)
    2041             :         {
    2042         664 :             if (rLocale1.Language != "en" && aLocale2.Language == "en" && aLocale2.Country == "US")
    2043             :             {
    2044             :                 // "en-US" is the last resort fallback, try if we get a better
    2045             :                 // one for the fallback hierarchy of a non-"en" locale.
    2046           0 :                 ::std::vector< OUString > aFallbacks( getFallbackStrings( false));
    2047           0 :                 for (::std::vector< OUString >::const_iterator it( aFallbacks.begin()); it != aFallbacks.end(); ++it)
    2048             :                 {
    2049           0 :                     lang::Locale aLocale3( LanguageTag( *it).getLocale());
    2050           0 :                     aLocale2 = MsLangId::Conversion::lookupFallbackLocale( aLocale3);
    2051           0 :                     if (aLocale2.Language != "en" || aLocale2.Country != "US")
    2052           0 :                         break;  // for, success
    2053           0 :                 }
    2054             :             }
    2055             :             SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
    2056             :                     rLocale1.Language << "," << rLocale1.Country << "," << rLocale1.Variant << ") to (" <<
    2057             :                     aLocale2.Language << "," << aLocale2.Country << "," << aLocale2.Variant << ")");
    2058         664 :             reset( aLocale2);
    2059             :         }
    2060        5597 :         mbIsFallback = true;
    2061             :     }
    2062        5597 :     return *this;
    2063             : }
    2064             : 
    2065             : 
    2066             : /* TODO: maybe this now could take advantage of the mnOverride field in
    2067             :  * isolang.cxx entries and search for kSAME instead of harcoded special
    2068             :  * fallbacks. Though iterating through those tables would be slower and even
    2069             :  * then there would be some special cases, but we wouldn't lack entries that
    2070             :  * were missed out. */
    2071      335768 : ::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const
    2072             : {
    2073      335768 :     ::std::vector< OUString > aVec;
    2074      671536 :     OUString aLanguage( getLanguage());
    2075      671536 :     OUString aCountry( getCountry());
    2076      335768 :     if (isIsoLocale())
    2077             :     {
    2078      296704 :         if (!aCountry.isEmpty())
    2079             :         {
    2080      243588 :             if (bIncludeFullBcp47)
    2081      215032 :                 aVec.push_back( aLanguage + "-" + aCountry);
    2082      243588 :             if (aLanguage == "zh")
    2083             :             {
    2084             :                 // For zh-HK or zh-MO also list zh-TW, for all other zh-XX also
    2085             :                 // list zh-CN.
    2086        2646 :                 if (aCountry == "HK" || aCountry == "MO")
    2087         882 :                     aVec.push_back( aLanguage + "-TW");
    2088        1764 :                 else if (aCountry != "CN")
    2089         882 :                     aVec.push_back( aLanguage + "-CN");
    2090        2646 :                 aVec.push_back( aLanguage);
    2091             :             }
    2092      240942 :             else if (aLanguage == "sh")
    2093             :             {
    2094             :                 // Manual list instead of calling
    2095             :                 // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
    2096             :                 // that would also include "sh-*" again.
    2097           0 :                 aVec.push_back( "sr-Latn-" + aCountry);
    2098           0 :                 aVec.push_back( "sr-Latn");
    2099           0 :                 aVec.push_back( "sh");  // legacy with script, before default script with country
    2100           0 :                 aVec.push_back( "sr-" + aCountry);
    2101           0 :                 aVec.push_back( "sr");
    2102             :             }
    2103      240942 :             else if (aLanguage == "ca" && aCountry == "XV")
    2104             :             {
    2105           0 :                 ::std::vector< OUString > aRep( LanguageTag( "ca-ES-valencia").getFallbackStrings( true));
    2106           0 :                 aVec.insert( aVec.end(), aRep.begin(), aRep.end());
    2107             :                 // Already includes 'ca' language fallback.
    2108             :             }
    2109      240942 :             else if (aLanguage == "ku")
    2110             :             {
    2111           0 :                 if (aCountry == "TR" || aCountry == "SY")
    2112             :                 {
    2113           0 :                     aVec.push_back( "kmr-Latn-" + aCountry);
    2114           0 :                     aVec.push_back( "kmr-" + aCountry);
    2115           0 :                     aVec.push_back( "kmr-Latn");
    2116           0 :                     aVec.push_back( "kmr");
    2117           0 :                     aVec.push_back( aLanguage);
    2118             :                 }
    2119           0 :                 else if (aCountry == "IQ" || aCountry == "IR")
    2120             :                 {
    2121           0 :                     aVec.push_back( "ckb-" + aCountry);
    2122           0 :                     aVec.push_back( "ckb");
    2123             :                 }
    2124             :             }
    2125      240942 :             else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY"))
    2126             :             {
    2127           0 :                 aVec.push_back( "ku-Latn-" + aCountry);
    2128           0 :                 aVec.push_back( "ku-" + aCountry);
    2129           0 :                 aVec.push_back( aLanguage);
    2130           0 :                 aVec.push_back( "ku");
    2131             :             }
    2132      240942 :             else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR"))
    2133             :             {
    2134        2646 :                 aVec.push_back( "ku-Arab-" + aCountry);
    2135        2646 :                 aVec.push_back( "ku-" + aCountry);
    2136        2646 :                 aVec.push_back( aLanguage);
    2137             :                 // not 'ku' only, that was used for Latin script
    2138             :             }
    2139             :             else
    2140      238296 :                 aVec.push_back( aLanguage);
    2141             :         }
    2142             :         else
    2143             :         {
    2144       53116 :             if (bIncludeFullBcp47)
    2145       53112 :                 aVec.push_back( aLanguage);
    2146       53116 :             if (aLanguage == "sh")
    2147             :             {
    2148           0 :                 aVec.push_back( "sr-Latn");
    2149           0 :                 aVec.push_back( "sr");
    2150             :             }
    2151       53116 :             else if (aLanguage == "pli")
    2152             :             {
    2153             :                 // a special case for Pali dictionary, see fdo#41599
    2154           0 :                 aVec.push_back( "pi-Latn");
    2155           0 :                 aVec.push_back( "pi");
    2156             :             }
    2157             :         }
    2158      296704 :         return aVec;
    2159             :     }
    2160             : 
    2161       39064 :     getBcp47();     // have maBcp47 now
    2162       39064 :     if (bIncludeFullBcp47)
    2163       39064 :         aVec.push_back( maBcp47);
    2164       78128 :     OUString aScript;
    2165       78128 :     OUString aVariants( getVariants());
    2166       78128 :     OUString aTmp;
    2167       39064 :     if (hasScript())
    2168             :     {
    2169       36849 :         aScript = getScript();
    2170       36849 :         bool bHaveLanguageScriptVariant = false;
    2171       36849 :         if (!aCountry.isEmpty())
    2172             :         {
    2173       24854 :             if (!aVariants.isEmpty())
    2174             :             {
    2175           0 :                 aTmp = aLanguage + "-" + aScript + "-" + aCountry + "-" + aVariants;
    2176           0 :                 if (aTmp != maBcp47)
    2177           0 :                     aVec.push_back( aTmp);
    2178             :                 // Language with variant but without country before language
    2179             :                 // without variant but with country.
    2180           0 :                 aTmp = aLanguage + "-" + aScript + "-" + aVariants;
    2181           0 :                 if (aTmp != maBcp47)
    2182           0 :                     aVec.push_back( aTmp);
    2183           0 :                 bHaveLanguageScriptVariant = true;
    2184             :             }
    2185       24854 :             aTmp = aLanguage + "-" + aScript + "-" + aCountry;
    2186       24854 :             if (aTmp != maBcp47)
    2187           0 :                 aVec.push_back( aTmp);
    2188       24854 :             if (aLanguage == "sr" && aScript == "Latn")
    2189             :             {
    2190             :                 // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
    2191        6178 :                 if (aCountry == "CS")
    2192             :                 {
    2193        1347 :                     aVec.push_back( "sr-Latn-YU");
    2194        1347 :                     aVec.push_back( "sh-CS");
    2195        1347 :                     aVec.push_back( "sh-YU");
    2196             :                 }
    2197             :                 else
    2198        4831 :                     aVec.push_back( "sh-" + aCountry);
    2199             :             }
    2200       18676 :             else if (aLanguage == "pi" && aScript == "Latn")
    2201           0 :                 aVec.push_back( "pli");     // a special case for Pali dictionary, see fdo#41599
    2202       18676 :             else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY"))
    2203           0 :                 aVec.push_back( "ku-" + aCountry);
    2204             :         }
    2205       36849 :         if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant)
    2206             :         {
    2207           0 :             aTmp = aLanguage + "-" + aScript + "-" + aVariants;
    2208           0 :             if (aTmp != maBcp47)
    2209           0 :                 aVec.push_back( aTmp);
    2210             :         }
    2211       36849 :         aTmp = aLanguage + "-" + aScript;
    2212       36849 :         if (aTmp != maBcp47)
    2213       24854 :             aVec.push_back( aTmp);
    2214             : 
    2215             :         // 'sh' actually denoted a script, so have it here instead of appended
    2216             :         // at the end as language-only.
    2217       36849 :         if (aLanguage == "sr" && aScript == "Latn")
    2218        7501 :             aVec.push_back( "sh");
    2219       29348 :         else if (aLanguage == "ku" && aScript == "Arab")
    2220           0 :             aVec.push_back( "ckb");
    2221             :         // 'ku' only denoted Latin script
    2222       29348 :         else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty())
    2223           0 :             aVec.push_back( "ku");
    2224             :     }
    2225       39064 :     bool bHaveLanguageVariant = false;
    2226       39064 :     if (!aCountry.isEmpty())
    2227             :     {
    2228       27065 :         if (!aVariants.isEmpty())
    2229             :         {
    2230        2211 :             aTmp = aLanguage + "-" + aCountry + "-" + aVariants;
    2231        2211 :             if (aTmp != maBcp47)
    2232           0 :                 aVec.push_back( aTmp);
    2233        2211 :             if (maBcp47 == "ca-ES-valencia")
    2234        1768 :                 aVec.push_back( "ca-XV");
    2235             :             // Language with variant but without country before language
    2236             :             // without variant but with country.
    2237             :             // But only if variant is not from a grandfathered tag that
    2238             :             // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
    2239             :             // not.
    2240        4422 :             if (aVariants.getLength() >= 5 ||
    2241         443 :                     (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
    2242             :             {
    2243        1768 :                 aTmp = aLanguage + "-" + aVariants;
    2244        1768 :                 if (aTmp != maBcp47)
    2245        1768 :                     aVec.push_back( aTmp);
    2246        1768 :                 bHaveLanguageVariant = true;
    2247             :             }
    2248             :         }
    2249       27065 :         aTmp = aLanguage + "-" + aCountry;
    2250       27065 :         if (aTmp != maBcp47)
    2251       27065 :             aVec.push_back( aTmp);
    2252             :     }
    2253       39064 :     if (!aVariants.isEmpty() && !bHaveLanguageVariant)
    2254             :     {
    2255             :         // Only if variant is not from a grandfathered tag that wouldn't match
    2256             :         // the rules, i.e. "de-1901" is fine but "en-oed" is not.
    2257         896 :         if (aVariants.getLength() >= 5 ||
    2258         447 :                 (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
    2259             :         {
    2260           4 :             aTmp = aLanguage + "-" + aVariants;
    2261           4 :             if (aTmp != maBcp47)
    2262           0 :                 aVec.push_back( aTmp);
    2263             :         }
    2264             :     }
    2265             : 
    2266             :     // Insert legacy fallbacks with country before language-only, but only
    2267             :     // default script, script was handled already above.
    2268       39064 :     if (!aCountry.isEmpty())
    2269             :     {
    2270       27065 :         if (aLanguage == "sr" && aCountry == "CS")
    2271        1347 :             aVec.push_back( "sr-YU");
    2272             :     }
    2273             : 
    2274             :     // Original language-only.
    2275       39064 :     if (aLanguage != maBcp47)
    2276       39064 :         aVec.push_back( aLanguage);
    2277             : 
    2278       39064 :     return aVec;
    2279             : }
    2280             : 
    2281             : 
    2282           0 : bool LanguageTag::equals( const LanguageTag & rLanguageTag, bool bResolveSystem ) const
    2283             : {
    2284             :     // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
    2285             :     // can use the operator==() optimization.
    2286           0 :     if (!bResolveSystem || isSystemLocale() == rLanguageTag.isSystemLocale())
    2287           0 :         return operator==( rLanguageTag);
    2288             : 
    2289             :     // Compare full language tag strings.
    2290           0 :     return getBcp47( bResolveSystem) == rLanguageTag.getBcp47( bResolveSystem);
    2291             : }
    2292             : 
    2293             : 
    2294     2239517 : bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
    2295             : {
    2296     2239517 :     if (isSystemLocale() && rLanguageTag.isSystemLocale())
    2297       96194 :         return true;    // both SYSTEM
    2298             : 
    2299             :     // No need to convert to BCP47 if both Lang-IDs are available.
    2300     2143323 :     if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
    2301             :     {
    2302             :         // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
    2303     2107561 :         return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
    2304             :     }
    2305             : 
    2306             :     // Compare full language tag strings but SYSTEM unresolved.
    2307       35762 :     return getBcp47( false) == rLanguageTag.getBcp47( false);
    2308             : }
    2309             : 
    2310             : 
    2311     2231478 : bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
    2312             : {
    2313     2231478 :     return !operator==( rLanguageTag);
    2314             : }
    2315             : 
    2316             : 
    2317         108 : bool LanguageTag::operator<( const LanguageTag & rLanguageTag ) const
    2318             : {
    2319         108 :     return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag.getBcp47( false)) < 0;
    2320             : }
    2321             : 
    2322             : 
    2323             : // static
    2324      476080 : LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp47,
    2325             :         OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rVariants )
    2326             : {
    2327      476080 :     Extraction eRet = EXTRACTED_NONE;
    2328      476080 :     const sal_Int32 nLen = rBcp47.getLength();
    2329      476080 :     const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
    2330      476080 :     sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
    2331      476080 :     sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
    2332      476080 :     sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
    2333      476080 :     if (nLen == 1 && rBcp47[0] == '*')              // * the dreaded jolly joker
    2334             :     {
    2335             :         // It's f*d up but we need to recognize this.
    2336          12 :         eRet = EXTRACTED_X_JOKER;
    2337             :     }
    2338      476068 :     else if (nHyph1 == 1 && rBcp47[0] == 'x')       // x-... privateuse
    2339             :     {
    2340             :         // x-... privateuse tags MUST be known to us by definition.
    2341          26 :         eRet = EXTRACTED_X;
    2342             :     }
    2343      476042 :     else if (nLen == 2 || nLen == 3)                // ll or lll
    2344             :     {
    2345       85514 :         if (nHyph1 < 0)
    2346             :         {
    2347       42757 :             rLanguage = rBcp47.toAsciiLowerCase();
    2348       42757 :             rScript = rCountry = rVariants = OUString();
    2349       42757 :             eRet = EXTRACTED_LSC;
    2350             :         }
    2351             :     }
    2352      433285 :     else if (  (nHyph1 == 2 && nLen == 5)           // ll-CC
    2353      169895 :             || (nHyph1 == 3 && nLen == 6))          // lll-CC
    2354             :     {
    2355      755768 :         if (nHyph2 < 0)
    2356             :         {
    2357      377884 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2358      377884 :             rCountry  = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
    2359      377884 :             rScript = rVariants = OUString();
    2360      377884 :             eRet = EXTRACTED_LSC;
    2361             :         }
    2362             :     }
    2363       55401 :     else if (  (nHyph1 == 2 && nLen ==  7)          // ll-Ssss or ll-vvvv
    2364       38237 :             || (nHyph1 == 3 && nLen ==  8))         // lll-Ssss or lll-vvvv
    2365             :     {
    2366       20604 :         if (nHyph2 < 0)
    2367             :         {
    2368       20604 :             sal_Unicode c = rBcp47[nHyph1+1];
    2369       20604 :             if ('0' <= c && c <= '9')
    2370             :             {
    2371             :                 // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
    2372           2 :                 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2373           2 :                 rScript   = rCountry = OUString();
    2374           2 :                 rVariants = rBcp47.copy( nHyph1 + 1);
    2375           2 :                 eRet = EXTRACTED_LV;
    2376             :             }
    2377             :             else
    2378             :             {
    2379       20602 :                 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2380       41204 :                 rScript   = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() +
    2381       61806 :                             rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
    2382       20602 :                 rCountry  = rVariants = OUString();
    2383       20602 :                 eRet = EXTRACTED_LSC;
    2384             :             }
    2385       20604 :         }
    2386             :     }
    2387       34797 :     else if (  (nHyph1 == 2 && nHyph2 == 7 && nLen == 10)   // ll-Ssss-CC
    2388       11905 :             || (nHyph1 == 3 && nHyph2 == 8 && nLen == 11))  // lll-Ssss-CC
    2389             :     {
    2390       65830 :         if (nHyph3 < 0)
    2391             :         {
    2392       32915 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2393       32915 :             rScript   = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
    2394       32915 :             rCountry  = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
    2395       32915 :             rVariants = OUString();
    2396       32915 :             eRet = EXTRACTED_LSC;
    2397             :         }
    2398             :     }
    2399        1882 :     else if (  (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15)   // ll-Ssss-CC-vvvv[vvvv][-...]
    2400        1882 :             || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16))  // lll-Ssss-CC-vvvv[vvvv][-...]
    2401             :     {
    2402           0 :         if (nHyph4 < 0)
    2403           0 :             nHyph4 = rBcp47.getLength();
    2404           0 :         if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
    2405             :         {
    2406           0 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2407           0 :             rScript   = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
    2408           0 :             rCountry  = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
    2409           0 :             rVariants = rBcp47.copy( nHyph3 + 1);
    2410           0 :             eRet = EXTRACTED_LV;
    2411             :         }
    2412             :     }
    2413        1882 :     else if (  (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10)   // ll-CC-vvvv[vvvv][-...]
    2414         878 :             || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11))  // lll-CC-vvvv[vvvv][-...]
    2415             :     {
    2416        1004 :         if (nHyph3 < 0)
    2417        1004 :             nHyph3 = rBcp47.getLength();
    2418        2008 :         if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
    2419             :         {
    2420        1004 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2421        1004 :             rScript   = OUString();
    2422        1004 :             rCountry  = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
    2423        1004 :             rVariants = rBcp47.copy( nHyph2 + 1);
    2424        1004 :             eRet = EXTRACTED_LV;
    2425             :         }
    2426             :     }
    2427         878 :     else if (  (nHyph1 == 2 && nLen >= 8)                   // ll-vvvvv[vvv][-...]
    2428           8 :             || (nHyph1 == 3 && nLen >= 9))                  // lll-vvvvv[vvv][-...]
    2429             :     {
    2430         870 :         if (nHyph2 < 0)
    2431          10 :             nHyph2 = rBcp47.getLength();
    2432         870 :         if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)
    2433             :         {
    2434           4 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2435           4 :             rScript   = rCountry = OUString();
    2436           4 :             rVariants = rBcp47.copy( nHyph1 + 1);
    2437           4 :             eRet = EXTRACTED_LV;
    2438             :         }
    2439             :         else
    2440             :         {
    2441             :             // Known and handled grandfathered; ugly but effective ...
    2442             :             // Note that nLen must have matched above.
    2443             :             // Strictly not a variant, but so far we treat it as such.
    2444         866 :             if (rBcp47.equalsIgnoreAsciiCase( "en-GB-oed"))
    2445             :             {
    2446         860 :                 rLanguage = "en";
    2447         860 :                 rScript   = OUString();
    2448         860 :                 rCountry  = "GB";
    2449         860 :                 rVariants = "oed";
    2450         860 :                 eRet = EXTRACTED_LV;
    2451             :             }
    2452             :         }
    2453             :     }
    2454      476080 :     if (eRet == EXTRACTED_NONE)
    2455             :     {
    2456             :         SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'");
    2457          14 :         rLanguage = rScript = rCountry = rVariants = OUString();
    2458             :     }
    2459      476080 :     return eRet;
    2460             : }
    2461             : 
    2462             : 
    2463             : // static
    2464       73896 : ::std::vector< OUString >::const_iterator LanguageTag::getFallback(
    2465             :         const ::std::vector< OUString > & rList, const OUString & rReference )
    2466             : {
    2467       73896 :     if (rList.empty())
    2468        6954 :         return rList.end();
    2469             : 
    2470       66942 :     ::std::vector< OUString >::const_iterator it;
    2471             : 
    2472             :     // Try the simple case first without constructing fallbacks.
    2473       95448 :     for (it = rList.begin(); it != rList.end(); ++it)
    2474             :     {
    2475       66942 :         if (*it == rReference)
    2476       38436 :             return it;  // exact match
    2477             :     }
    2478             : 
    2479       28506 :     ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
    2480       28506 :     if (rReference != "en-US")
    2481             :     {
    2482           0 :         aFallbacks.push_back( "en-US");
    2483           0 :         if (rReference != "en")
    2484           0 :             aFallbacks.push_back( "en");
    2485             :     }
    2486       28506 :     if (rReference != "x-default")
    2487       28506 :         aFallbacks.push_back( "x-default");
    2488       28506 :     if (rReference != "x-no-translate")
    2489       28506 :         aFallbacks.push_back( "x-no-translate");
    2490             :     /* TODO: the original comphelper::Locale::getFallback() code had
    2491             :      * "x-notranslate" instead of "x-no-translate", but all .xcu files use
    2492             :      * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
    2493             :      * Did that ever work? Was it supposed to work at all like this? */
    2494             : 
    2495      111588 :     for (::std::vector< OUString >::const_iterator fb = aFallbacks.begin(); fb != aFallbacks.end(); ++fb)
    2496             :     {
    2497      167382 :         for (it = rList.begin(); it != rList.end(); ++it)
    2498             :         {
    2499       84300 :             if (*it == *fb)
    2500        1218 :                 return it;  // fallback found
    2501             :         }
    2502             :     }
    2503             : 
    2504             :     // Did not find anything so return something of the list, the first value
    2505             :     // will do as well as any other as none did match any of the possible
    2506             :     // fallbacks.
    2507       27288 :     return rList.begin();
    2508             : }
    2509             : 
    2510             : 
    2511             : // static
    2512           0 : ::std::vector< com::sun::star::lang::Locale >::const_iterator LanguageTag::getMatchingFallback(
    2513             :         const ::std::vector< com::sun::star::lang::Locale > & rList,
    2514             :         const com::sun::star::lang::Locale & rReference )
    2515             : {
    2516           0 :     if (rList.empty())
    2517           0 :         return rList.end();
    2518             : 
    2519           0 :     ::std::vector< lang::Locale >::const_iterator it;
    2520             : 
    2521             :     // Try the simple case first without constructing fallbacks.
    2522           0 :     for (it = rList.begin(); it != rList.end(); ++it)
    2523             :     {
    2524           0 :         if (    (*it).Language == rReference.Language &&
    2525           0 :                 (*it).Country  == rReference.Country  &&
    2526           0 :                 (*it).Variant  == rReference.Variant)
    2527           0 :             return it;  // exact match
    2528             :     }
    2529             : 
    2530             :     // Now for each reference fallback test the fallbacks of the list in order.
    2531           0 :     ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
    2532           0 :     ::std::vector< ::std::vector< OUString > > aListFallbacks( rList.size());
    2533           0 :     size_t i = 0;
    2534           0 :     for (it = rList.begin(); it != rList.end(); ++it, ++i)
    2535             :     {
    2536           0 :         ::std::vector< OUString > aTmp( LanguageTag( *it).getFallbackStrings( true));
    2537           0 :         aListFallbacks[i] = aTmp;
    2538           0 :     }
    2539           0 :     for (::std::vector< OUString >::const_iterator rfb( aFallbacks.begin()); rfb != aFallbacks.end(); ++rfb)
    2540             :     {
    2541           0 :         for (::std::vector< ::std::vector< OUString > >::const_iterator lfb( aListFallbacks.begin());
    2542           0 :                 lfb != aListFallbacks.end(); ++lfb)
    2543             :         {
    2544           0 :             for (::std::vector< OUString >::const_iterator fb( (*lfb).begin()); fb != (*lfb).end(); ++fb)
    2545             :             {
    2546           0 :                 if (*rfb == *fb)
    2547           0 :                     return rList.begin() + (lfb - aListFallbacks.begin());
    2548             :             }
    2549             :         }
    2550             :     }
    2551             : 
    2552             :     // No match found.
    2553           0 :     return rList.end();
    2554             : }
    2555             : 
    2556             : 
    2557         408 : static bool lcl_isSystem( LanguageType nLangID )
    2558             : {
    2559         408 :     if (nLangID == LANGUAGE_SYSTEM)
    2560         342 :         return true;
    2561             :     // There are some special values that simplify to SYSTEM,
    2562             :     // getRealLanguage() catches and resolves them.
    2563          66 :     LanguageType nNewLangID = MsLangId::getRealLanguage( nLangID);
    2564          66 :     if (nNewLangID != nLangID)
    2565           0 :         return true;
    2566          66 :     return false;
    2567             : }
    2568             : 
    2569             : 
    2570             : // static
    2571      316284 : com::sun::star::lang::Locale LanguageTag::convertToLocale( LanguageType nLangID, bool bResolveSystem )
    2572             : {
    2573      316284 :     if (!bResolveSystem && lcl_isSystem( nLangID))
    2574         342 :         return lang::Locale();
    2575             : 
    2576      315942 :     return LanguageTag( nLangID).getLocale( bResolveSystem);
    2577             : }
    2578             : 
    2579             : 
    2580             : // static
    2581      630059 : LanguageType LanguageTag::convertToLanguageType( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem )
    2582             : {
    2583      630059 :     if (rLocale.Language.isEmpty() && !bResolveSystem)
    2584       42522 :         return LANGUAGE_SYSTEM;
    2585             : 
    2586      587537 :     return LanguageTag( rLocale).getLanguageType( bResolveSystem);
    2587             : }
    2588             : 
    2589             : 
    2590             : // static
    2591      911942 : OUString LanguageTagImpl::convertToBcp47( const com::sun::star::lang::Locale& rLocale )
    2592             : {
    2593      911942 :     OUString aBcp47;
    2594      911942 :     if (rLocale.Language.isEmpty())
    2595             :     {
    2596             :         // aBcp47 stays empty
    2597             :     }
    2598      911942 :     else if (rLocale.Language == I18NLANGTAG_QLT)
    2599             :     {
    2600       45352 :         aBcp47 = rLocale.Variant;
    2601             :     }
    2602             :     else
    2603             :     {
    2604             :         /* XXX NOTE: most legacy code never evaluated the Variant field, so for
    2605             :          * now just concatenate language and country. In case we stumbled over
    2606             :          * variant aware code we'd have to take care of that. */
    2607      866590 :         if (rLocale.Country.isEmpty())
    2608      120646 :             aBcp47 = rLocale.Language;
    2609             :         else
    2610             :         {
    2611      745944 :             aBcp47 = rLocale.Language + "-" + rLocale.Country;
    2612             :         }
    2613             :     }
    2614      911942 :     return aBcp47;
    2615             : }
    2616             : 
    2617             : 
    2618             : // static
    2619       15564 : OUString LanguageTag::convertToBcp47( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem )
    2620             : {
    2621       15564 :     OUString aBcp47;
    2622       15564 :     if (rLocale.Language.isEmpty())
    2623             :     {
    2624          30 :         if (bResolveSystem)
    2625           8 :             aBcp47 = LanguageTag::convertToBcp47( LANGUAGE_SYSTEM, true);
    2626             :         // else aBcp47 stays empty
    2627             :     }
    2628             :     else
    2629             :     {
    2630       15534 :         aBcp47 = LanguageTagImpl::convertToBcp47( rLocale);
    2631             :     }
    2632       15564 :     return aBcp47;
    2633             : }
    2634             : 
    2635             : 
    2636             : // static
    2637         600 : OUString LanguageTag::convertToBcp47( LanguageType nLangID, bool bResolveSystem )
    2638             : {
    2639             :     // Catch this first so we don't need the rest.
    2640         600 :     if (!bResolveSystem && lcl_isSystem( nLangID))
    2641           0 :         return OUString();
    2642             : 
    2643         600 :     lang::Locale aLocale( LanguageTag::convertToLocale( nLangID, bResolveSystem));
    2644             :     // If system for some reason (should not happen.. haha) could not be
    2645             :     // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
    2646             :     // would recurse into this method here!
    2647         600 :     if (aLocale.Language.isEmpty() && bResolveSystem)
    2648           0 :         return OUString();      // bad luck, bail out
    2649         600 :     return LanguageTagImpl::convertToBcp47( aLocale);
    2650             : }
    2651             : 
    2652             : 
    2653             : // static
    2654       80824 : com::sun::star::lang::Locale LanguageTag::convertToLocale( const OUString& rBcp47, bool bResolveSystem )
    2655             : {
    2656       80824 :     if (rBcp47.isEmpty() && !bResolveSystem)
    2657           0 :         return lang::Locale();
    2658             : 
    2659       80824 :     return LanguageTag( rBcp47).getLocale( bResolveSystem);
    2660             : }
    2661             : 
    2662             : 
    2663             : // static
    2664        2968 : LanguageType LanguageTag::convertToLanguageType( const OUString& rBcp47, bool bResolveSystem )
    2665             : {
    2666        2968 :     if (rBcp47.isEmpty() && !bResolveSystem)
    2667           0 :         return LANGUAGE_SYSTEM;
    2668             : 
    2669        2968 :     return LanguageTag( rBcp47).getLanguageType( bResolveSystem);
    2670             : }
    2671             : 
    2672             : 
    2673             : // static
    2674        4571 : LanguageType LanguageTag::convertToLanguageTypeWithFallback( const OUString& rBcp47 )
    2675             : {
    2676        4571 :     return LanguageTag( rBcp47).makeFallback().getLanguageType( true);
    2677             : }
    2678             : 
    2679             : 
    2680             : // static
    2681           0 : com::sun::star::lang::Locale LanguageTag::convertToLocaleWithFallback( const OUString& rBcp47 )
    2682             : {
    2683           0 :     return LanguageTag( rBcp47).makeFallback().getLocale( true);
    2684             : }
    2685             : 
    2686             : 
    2687             : // static
    2688          18 : bool LanguageTag::isValidBcp47( const OUString& rString, OUString* o_pCanonicalized, bool bDisallowPrivate )
    2689             : {
    2690          18 :     bool bValid = false;
    2691             : 
    2692             :     struct guard
    2693             :     {
    2694             :         lt_tag_t* mpLangtag;
    2695          18 :         guard()
    2696             :         {
    2697          18 :             theDataRef::get().incRef();
    2698          18 :             mpLangtag = lt_tag_new();
    2699          18 :         }
    2700          18 :         ~guard()
    2701             :         {
    2702          18 :             lt_tag_unref( mpLangtag);
    2703          18 :             theDataRef::get().decRef();
    2704          18 :         }
    2705          18 :     } aVar;
    2706             : 
    2707          36 :     myLtError aError;
    2708             : 
    2709          18 :     if (lt_tag_parse( aVar.mpLangtag, OUStringToOString( rString, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
    2710             :     {
    2711          16 :         char* pTag = lt_tag_canonicalize( aVar.mpLangtag, &aError.p);
    2712             :         SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTag:isValidBcp47: could not canonicalize '" << rString << "'");
    2713          16 :         if (pTag)
    2714             :         {
    2715          16 :             bValid = true;
    2716          16 :             if (bDisallowPrivate)
    2717             :             {
    2718           6 :                 const lt_string_t* pPrivate = lt_tag_get_privateuse( aVar.mpLangtag);
    2719           6 :                 if (pPrivate && lt_string_length( pPrivate) > 0)
    2720           2 :                     bValid = false;
    2721             :                 else
    2722             :                 {
    2723           4 :                     const lt_lang_t* pLangT = lt_tag_get_language( aVar.mpLangtag);
    2724           4 :                     if (pLangT)
    2725             :                     {
    2726           4 :                         const char* pLang = lt_lang_get_tag( pLangT);
    2727           4 :                         if (pLang && strcmp( pLang, I18NLANGTAG_QLT) == 0)
    2728             :                         {
    2729             :                             // Disallow 'qlt' privateuse code to prevent
    2730             :                             // confusion with our internal usage.
    2731           0 :                             bValid = false;
    2732             :                         }
    2733             :                     }
    2734             :                 }
    2735             :             }
    2736          16 :             if (o_pCanonicalized)
    2737          16 :                 *o_pCanonicalized = OUString::createFromAscii( pTag);
    2738          16 :             free( pTag);
    2739          16 :             return bValid;
    2740             :         }
    2741             :     }
    2742             :     else
    2743             :     {
    2744             :         SAL_INFO( "i18nlangtag", "LanguageTag:isValidBcp47: could not parse '" << rString << "'");
    2745             :     }
    2746          20 :     return bValid;
    2747             : }
    2748             : 
    2749        8039 : LanguageTag makeLanguageTagFromAppleLanguageId(AppleLanguageId nLanguage)
    2750             : {
    2751             :     //map the simple ones via LanguageTypes, and the hard ones explictly
    2752        8039 :     LanguageType nLang(LANGUAGE_DONTKNOW);
    2753             : 
    2754        8039 :     switch (nLanguage)
    2755             :     {
    2756             :         case APPLE_LANG_ENGLISH:
    2757        8039 :             nLang = LANGUAGE_ENGLISH;
    2758        8039 :             break;
    2759             :         case APPLE_LANG_FRENCH:
    2760           0 :             nLang = LANGUAGE_FRENCH;
    2761           0 :             break;
    2762             :         case APPLE_LANG_GERMAN:
    2763           0 :             nLang = LANGUAGE_GERMAN;
    2764           0 :             break;
    2765             :         case APPLE_LANG_ITALIAN:
    2766           0 :             nLang = LANGUAGE_ITALIAN;
    2767           0 :             break;
    2768             :         case APPLE_LANG_DUTCH:
    2769           0 :             nLang = LANGUAGE_DUTCH;
    2770           0 :             break;
    2771             :         case APPLE_LANG_SWEDISH:
    2772           0 :             nLang = LANGUAGE_SWEDISH;
    2773           0 :             break;
    2774             :         case APPLE_LANG_SPANISH:
    2775           0 :             nLang = LANGUAGE_SPANISH;
    2776           0 :             break;
    2777             :         case APPLE_LANG_DANISH:
    2778           0 :             nLang = LANGUAGE_DANISH;
    2779           0 :             break;
    2780             :         case APPLE_LANG_PORTUGUESE:
    2781           0 :             nLang = LANGUAGE_PORTUGUESE;
    2782           0 :             break;
    2783             :         case APPLE_LANG_NORWEGIAN:
    2784           0 :             nLang = LANGUAGE_NORWEGIAN;
    2785           0 :             break;
    2786             :         case APPLE_LANG_HEBREW:
    2787           0 :             nLang = LANGUAGE_HEBREW;
    2788           0 :             break;
    2789             :         case APPLE_LANG_JAPANESE:
    2790           0 :             nLang = LANGUAGE_JAPANESE;
    2791           0 :             break;
    2792             :         case APPLE_LANG_ARABIC:
    2793           0 :             nLang = LANGUAGE_ARABIC_PRIMARY_ONLY;
    2794           0 :             break;
    2795             :         case APPLE_LANG_FINNISH:
    2796           0 :             nLang = LANGUAGE_FINNISH;
    2797           0 :             break;
    2798             :         case APPLE_LANG_GREEK:
    2799           0 :             nLang = LANGUAGE_GREEK;
    2800           0 :             break;
    2801             :         case APPLE_LANG_ICELANDIC:
    2802           0 :             nLang = LANGUAGE_ICELANDIC;
    2803           0 :             break;
    2804             :         case APPLE_LANG_MALTESE:
    2805           0 :             nLang = LANGUAGE_MALTESE;
    2806           0 :             break;
    2807             :         case APPLE_LANG_TURKISH:
    2808           0 :             nLang = LANGUAGE_TURKISH;
    2809           0 :             break;
    2810             :         case APPLE_LANG_CROATIAN:
    2811           0 :             nLang = LANGUAGE_CROATIAN;
    2812           0 :             break;
    2813             :         case APPLE_LANG_CHINESE_TRADITIONAL:
    2814           0 :             nLang = LANGUAGE_CHINESE_TRADITIONAL;
    2815           0 :             break;
    2816             :         case APPLE_LANG_URDU:
    2817           0 :             nLang = LANGUAGE_URDU_PAKISTAN; //probably, otherwise we need a LANGUAGE_URDU_PRIMARY_ONLY
    2818           0 :             break;
    2819             :         case APPLE_LANG_HINDI:
    2820           0 :             nLang = LANGUAGE_HINDI;
    2821           0 :             break;
    2822             :         case APPLE_LANG_THAI:
    2823           0 :             nLang = LANGUAGE_THAI;
    2824           0 :             break;
    2825             :         case APPLE_LANG_KOREAN:
    2826           0 :             nLang = LANGUAGE_KOREAN;
    2827           0 :             break;
    2828             :         case APPLE_LANG_LITHUANIAN:
    2829           0 :             nLang = LANGUAGE_LITHUANIAN;
    2830           0 :             break;
    2831             :         case APPLE_LANG_POLISH:
    2832           0 :             nLang = LANGUAGE_POLISH;
    2833           0 :             break;
    2834             :         case APPLE_LANG_HUNGARIAN:
    2835           0 :             nLang = LANGUAGE_HUNGARIAN;
    2836           0 :             break;
    2837             :         case APPLE_LANG_ESTONIAN:
    2838           0 :             nLang = LANGUAGE_ESTONIAN;
    2839           0 :             break;
    2840             :         case APPLE_LANG_LATVIAN:
    2841           0 :             nLang = LANGUAGE_LATVIAN;
    2842           0 :             break;
    2843             :         case APPLE_LANG_SAMI:
    2844           0 :             nLang = LANGUAGE_SAMI_NORTHERN_NORWAY; //maybe
    2845           0 :             break;
    2846             :         case APPLE_LANG_FAROESE:
    2847           0 :             nLang = LANGUAGE_FAEROESE;
    2848           0 :             break;
    2849             :         case APPLE_LANG_FARSI:
    2850           0 :             nLang = LANGUAGE_FARSI;
    2851           0 :             break;
    2852             :         case APPLE_LANG_RUSSIAN:
    2853           0 :             nLang = LANGUAGE_RUSSIAN;
    2854           0 :             break;
    2855             :         case APPLE_LANG_CHINESE_SIMPLIFIED:
    2856           0 :             nLang = LANGUAGE_CHINESE_SIMPLIFIED;
    2857           0 :             break;
    2858             :         case APPLE_LANG_FLEMISH:
    2859           0 :             nLang = LANGUAGE_DUTCH_BELGIAN;
    2860           0 :             break;
    2861             :         case APPLE_LANG_IRISH_GAELIC:
    2862           0 :             nLang = LANGUAGE_GAELIC_IRELAND;
    2863           0 :             break;
    2864             :         case APPLE_LANG_ALBANIAN:
    2865           0 :             nLang = LANGUAGE_ALBANIAN;
    2866           0 :             break;
    2867             :         case APPLE_LANG_ROMANIAN:
    2868           0 :             nLang = LANGUAGE_ROMANIAN;
    2869           0 :             break;
    2870             :         case APPLE_LANG_CZECH:
    2871           0 :             nLang = LANGUAGE_CZECH;
    2872           0 :             break;
    2873             :         case APPLE_LANG_SLOVAK:
    2874           0 :             nLang = LANGUAGE_SLOVAK;
    2875           0 :             break;
    2876             :         case APPLE_LANG_SLOVENIAN:
    2877           0 :             nLang = LANGUAGE_SLOVENIAN;
    2878           0 :             break;
    2879             :         case APPLE_LANG_YIDDISH:
    2880           0 :             nLang = LANGUAGE_YIDDISH;
    2881           0 :             break;
    2882             :         case APPLE_LANG_SERBIAN:
    2883           0 :             nLang = LANGUAGE_SERBIAN_CYRILLIC_SERBIA;   //maybe
    2884           0 :             break;
    2885             :         case APPLE_LANG_MACEDONIAN:
    2886           0 :             nLang = LANGUAGE_MACEDONIAN;
    2887           0 :             break;
    2888             :         case APPLE_LANG_BULGARIAN:
    2889           0 :             nLang = LANGUAGE_BULGARIAN;
    2890           0 :             break;
    2891             :         case APPLE_LANG_UKRAINIAN:
    2892           0 :             nLang = LANGUAGE_UKRAINIAN;
    2893           0 :             break;
    2894             :         case APPLE_LANG_BYELORUSSIAN:
    2895           0 :             nLang = LANGUAGE_BELARUSIAN;
    2896           0 :             break;
    2897             :         case APPLE_LANG_UZBEK:
    2898           0 :             nLang = LANGUAGE_UZBEK_CYRILLIC; //maybe
    2899           0 :             break;
    2900             :         case APPLE_LANG_KAZAKH:
    2901           0 :             nLang = LANGUAGE_KAZAKH;
    2902           0 :             break;
    2903             :         case APPLE_LANG_AZERI_CYRILLIC:
    2904           0 :             nLang = LANGUAGE_AZERI_CYRILLIC;
    2905           0 :             break;
    2906             :         case APPLE_LANG_AZERI_ARABIC:
    2907           0 :             return LanguageTag("az-Arab");
    2908             :             break;
    2909             :         case APPLE_LANG_ARMENIAN:
    2910           0 :             nLang = LANGUAGE_ARMENIAN;
    2911           0 :             break;
    2912             :         case APPLE_LANG_GEORGIAN:
    2913           0 :             nLang = LANGUAGE_GEORGIAN;
    2914           0 :             break;
    2915             :         case APPLE_LANG_MOLDAVIAN:
    2916           0 :             nLang = LANGUAGE_ROMANIAN_MOLDOVA;
    2917           0 :             break;
    2918             :         case APPLE_LANG_KIRGHIZ:
    2919           0 :             nLang = LANGUAGE_KIRGHIZ;
    2920           0 :             break;
    2921             :         case APPLE_LANG_TAJIKI:
    2922           0 :             nLang = LANGUAGE_TAJIK;
    2923           0 :             break;
    2924             :         case APPLE_LANG_TURKMEN:
    2925           0 :             nLang = LANGUAGE_TURKMEN;
    2926           0 :             break;
    2927             :         case APPLE_LANG_MONGOLIAN_MONGOLIAN:
    2928           0 :             nLang = LANGUAGE_MONGOLIAN_MONGOLIAN_MONGOLIA;
    2929           0 :             break;
    2930             :         case APPLE_LANG_MONGOLIAN_CYRILLIC:
    2931           0 :             nLang = LANGUAGE_MONGOLIAN_CYRILLIC_MONGOLIA;
    2932           0 :             break;
    2933             :         case APPLE_LANG_PASHTO:
    2934           0 :             nLang = LANGUAGE_PASHTO;
    2935           0 :             break;
    2936             :         case APPLE_LANG_KURDISH:
    2937           0 :             nLang = LANGUAGE_USER_KURDISH_TURKEY; //maybe
    2938           0 :             break;
    2939             :         case APPLE_LANG_KASHMIRI:
    2940           0 :             nLang = LANGUAGE_KASHMIRI;
    2941           0 :             break;
    2942             :         case APPLE_LANG_SINDHI:
    2943           0 :             nLang = LANGUAGE_SINDHI;
    2944           0 :             break;
    2945             :         case APPLE_LANG_TIBETAN:
    2946           0 :             nLang = LANGUAGE_TIBETAN;
    2947           0 :             break;
    2948             :         case APPLE_LANG_NEPALI:
    2949           0 :             nLang = LANGUAGE_NEPALI;
    2950           0 :             break;
    2951             :         case APPLE_LANG_SANSKRIT:
    2952           0 :             nLang = LANGUAGE_SANSKRIT;
    2953           0 :             break;
    2954             :         case APPLE_LANG_MARATHI:
    2955           0 :             nLang = LANGUAGE_MARATHI;
    2956           0 :             break;
    2957             :         case APPLE_LANG_BENGALI:
    2958           0 :             nLang = LANGUAGE_BENGALI;
    2959           0 :             break;
    2960             :         case APPLE_LANG_ASSAMESE:
    2961           0 :             nLang = LANGUAGE_ASSAMESE;
    2962           0 :             break;
    2963             :         case APPLE_LANG_GUJARATI:
    2964           0 :             nLang = LANGUAGE_GUJARATI;
    2965           0 :             break;
    2966             :         case APPLE_LANG_PUNJABI:
    2967           0 :             nLang = LANGUAGE_PUNJABI;
    2968           0 :             break;
    2969             :         case APPLE_LANG_ORIYA:
    2970           0 :             nLang = LANGUAGE_ODIA;
    2971           0 :             break;
    2972             :         case APPLE_LANG_MALAYALAM:
    2973           0 :             nLang = LANGUAGE_MALAYALAM;
    2974           0 :             break;
    2975             :         case APPLE_LANG_KANNADA:
    2976           0 :             nLang = LANGUAGE_KANNADA;
    2977           0 :             break;
    2978             :         case APPLE_LANG_TAMIL:
    2979           0 :             nLang = LANGUAGE_TAMIL;
    2980           0 :             break;
    2981             :         case APPLE_LANG_TELUGU:
    2982           0 :             nLang = LANGUAGE_TELUGU;
    2983           0 :             break;
    2984             :         case APPLE_LANG_SINHALESE:
    2985           0 :             nLang = LANGUAGE_SINHALESE_SRI_LANKA;
    2986           0 :             break;
    2987             :         case APPLE_LANG_BURMESE:
    2988           0 :             nLang = LANGUAGE_BURMESE;
    2989           0 :             break;
    2990             :         case APPLE_LANG_KHMER:
    2991           0 :             nLang = LANGUAGE_KHMER;
    2992           0 :             break;
    2993             :         case APPLE_LANG_LAO:
    2994           0 :             nLang = LANGUAGE_LAO;
    2995           0 :             break;
    2996             :         case APPLE_LANG_VIETNAMESE:
    2997           0 :             nLang = LANGUAGE_VIETNAMESE;
    2998           0 :             break;
    2999             :         case APPLE_LANG_INDONESIAN:
    3000           0 :             nLang = LANGUAGE_INDONESIAN;
    3001           0 :             break;
    3002             :         case APPLE_LANG_TAGALONG:
    3003           0 :             nLang = LANGUAGE_USER_TAGALOG;
    3004           0 :             break;
    3005             :         case APPLE_LANG_MALAY_LATIN:
    3006           0 :             nLang = LANGUAGE_MALAY_MALAYSIA;
    3007           0 :             break;
    3008             :         case APPLE_LANG_MALAY_ARABIC:
    3009           0 :             return LanguageTag("ms-Arab");
    3010             :             break;
    3011             :         case APPLE_LANG_AMHARIC:
    3012           0 :             nLang = LANGUAGE_AMHARIC_ETHIOPIA;
    3013           0 :             break;
    3014             :         case APPLE_LANG_TIGRINYA:
    3015           0 :             nLang = LANGUAGE_TIGRIGNA_ETHIOPIA;
    3016           0 :             break;
    3017             :         case APPLE_LANG_GALLA:
    3018           0 :             nLang = LANGUAGE_OROMO;
    3019           0 :             break;
    3020             :         case APPLE_LANG_SOMALI:
    3021           0 :             nLang = LANGUAGE_SOMALI;
    3022           0 :             break;
    3023             :         case APPLE_LANG_SWAHILI:
    3024           0 :             nLang = LANGUAGE_SWAHILI;
    3025           0 :             break;
    3026             :         case APPLE_LANG_KINYARWANDA:
    3027           0 :             nLang = LANGUAGE_KINYARWANDA_RWANDA;
    3028           0 :             break;
    3029             :         case APPLE_LANG_RUNDI:
    3030           0 :             return LanguageTag("rn");
    3031             :             break;
    3032             :         case APPLE_LANG_NYANJA:
    3033           0 :             nLang = LANGUAGE_USER_NYANJA;
    3034           0 :             break;
    3035             :         case APPLE_LANG_MALAGASY:
    3036           0 :             nLang = LANGUAGE_MALAGASY_PLATEAU;
    3037           0 :             break;
    3038             :         case APPLE_LANG_ESPERANTO:
    3039           0 :             nLang = LANGUAGE_USER_ESPERANTO;
    3040           0 :             break;
    3041             :         case APPLE_LANG_WELSH:
    3042           0 :             nLang = LANGUAGE_WELSH;
    3043           0 :             break;
    3044             :         case APPLE_LANG_BASQUE:
    3045           0 :             nLang = LANGUAGE_BASQUE;
    3046           0 :             break;
    3047             :         case APPLE_LANG_CATALAN:
    3048           0 :             nLang = LANGUAGE_CATALAN;
    3049           0 :             break;
    3050             :         case APPLE_LANG_LATIN:
    3051           0 :             nLang = LANGUAGE_USER_LATIN;
    3052           0 :             break;
    3053             :         case APPLE_LANG_QUENCHUA:
    3054           0 :             nLang = LANGUAGE_QUECHUA_BOLIVIA; //maybe
    3055           0 :             break;
    3056             :         case APPLE_LANG_GUARANI:
    3057           0 :             nLang = LANGUAGE_GUARANI_PARAGUAY;
    3058           0 :             break;
    3059             :         case APPLE_LANG_AYMARA:
    3060           0 :             return LanguageTag("ay");
    3061             :             break;
    3062             :         case APPLE_LANG_TATAR:
    3063           0 :             nLang = LANGUAGE_TATAR;
    3064           0 :             break;
    3065             :         case APPLE_LANG_UIGHUR:
    3066           0 :             nLang = LANGUAGE_UIGHUR_CHINA;
    3067           0 :             break;
    3068             :         case APPLE_LANG_DZONGKHA:
    3069           0 :             nLang = LANGUAGE_DZONGKHA;
    3070           0 :             break;
    3071             :         case APPLE_LANG_JAVANESE_LATIN:
    3072           0 :             return LanguageTag("jv-Latn");
    3073             :             break;
    3074             :         case APPLE_LANG_SUNDANESE_LATIN:
    3075           0 :             return LanguageTag("su-Latn");
    3076             :             break;
    3077             :         case APPLE_LANG_GALICIAN:
    3078           0 :             nLang = LANGUAGE_GALICIAN;
    3079           0 :             break;
    3080             :         case APPLE_LANG_AFRIKAANS:
    3081           0 :             nLang = LANGUAGE_AFRIKAANS;
    3082           0 :             break;
    3083             :         case APPLE_LANG_BRETON:
    3084           0 :             nLang = LANGUAGE_BRETON_FRANCE;
    3085           0 :             break;
    3086             :         case APPLE_LANG_INUKTITUT:
    3087           0 :             nLang = LANGUAGE_INUKTITUT_LATIN_CANADA; //probably
    3088           0 :             break;
    3089             :         case APPLE_LANG_SCOTTISH_GAELIC:
    3090           0 :             nLang = LANGUAGE_GAELIC_SCOTLAND;
    3091           0 :             break;
    3092             :         case APPLE_LANG_MANX_GAELIC:
    3093           0 :             nLang = LANGUAGE_USER_MANX;
    3094           0 :             break;
    3095             :         case APPLE_LANG_IRISH_GAELIC_WITH_DOT_ABOVE:
    3096           0 :             return LanguageTag("ga-Latg");
    3097             :             break;
    3098             :         case APPLE_LANG_TONGAN:
    3099           0 :             return LanguageTag("to");
    3100             :             break;
    3101             :         case APPLE_LANG_GREEK_POLYTONIC:
    3102           0 :             nLang = LANGUAGE_USER_ANCIENT_GREEK;
    3103           0 :             break;
    3104             :         case APPLE_LANG_GREENLANDIC:
    3105           0 :             nLang = LANGUAGE_KALAALLISUT_GREENLAND;
    3106           0 :             break;
    3107             :         case APPLE_LANG_AZERI_LATIN:
    3108           0 :             nLang = LANGUAGE_AZERI_LATIN;
    3109           0 :             break;
    3110             :     }
    3111             : 
    3112        8039 :     return LanguageTag(nLang);
    3113             : }
    3114             : 
    3115             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10