LCOV - code coverage report
Current view: top level - i18nlangtag/source/languagetag - languagetag.cxx (source / functions) Hit Total Coverage
Test: commit 0e63ca4fde4e446f346e35849c756a30ca294aab Lines: 904 1146 78.9 %
Date: 2014-04-11 Functions: 94 106 88.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #include <config_folders.h>
      11             : 
      12             : #include "i18nlangtag/languagetag.hxx"
      13             : #include "i18nlangtag/mslangid.hxx"
      14             : #include <rtl/ustrbuf.hxx>
      15             : #include <rtl/bootstrap.hxx>
      16             : #include <osl/file.hxx>
      17             : #include <rtl/instance.hxx>
      18             : #include <rtl/locale.h>
      19             : #include <boost/unordered_set.hpp>
      20             : #include <map>
      21             : 
      22             : //#define erDEBUG
      23             : 
      24             : #if defined(ENABLE_LIBLANGTAG)
      25             : #include <liblangtag/langtag.h>
      26             : #else
      27             : /* Replacement code for LGPL phobic and Android systems.
      28             :  * For iOS we could probably use NSLocale instead, that should have more or
      29             :  * less required functionality. If it is good enough, it could be used for Mac
      30             :  * OS X, too.
      31             :  */
      32             : #include "simple-langtag.cxx"
      33             : #endif
      34             : 
      35             : using namespace com::sun::star;
      36             : 
      37             : 
      38             : // Helper to ensure lt_error_t is free'd
      39             : struct myLtError
      40             : {
      41             :     lt_error_t* p;
      42          13 :     myLtError() : p(NULL) {}
      43          13 :     ~myLtError() { if (p) lt_error_unref( p); }
      44             : };
      45             : 
      46             : // "statics" to be returned as const reference to an empty locale and string.
      47             : namespace {
      48             : struct theEmptyLocale : public rtl::Static< lang::Locale, theEmptyLocale > {};
      49             : struct theEmptyBcp47 : public rtl::Static< OUString, theEmptyBcp47 > {};
      50             : }
      51             : 
      52             : typedef ::boost::unordered_set< OUString, OUStringHash > KnownTagSet;
      53             : namespace {
      54             : struct theKnowns : public rtl::Static< KnownTagSet, theKnowns > {};
      55             : struct theMutex : public rtl::Static< osl::Mutex, theMutex > {};
      56             : }
      57             : 
      58           8 : static const KnownTagSet & getKnowns()
      59             : {
      60           8 :     KnownTagSet & rKnowns = theKnowns::get();
      61           8 :     if (rKnowns.empty())
      62             :     {
      63           5 :         osl::MutexGuard aGuard( theMutex::get());
      64           5 :         if (rKnowns.empty())
      65             :         {
      66           5 :             ::std::vector< MsLangId::LanguagetagMapping > aDefined( MsLangId::getDefinedLanguagetags());
      67        8250 :             for (::std::vector< MsLangId::LanguagetagMapping >::const_iterator it( aDefined.begin());
      68        5500 :                     it != aDefined.end(); ++it)
      69             :             {
      70             :                 // Do not use the BCP47 string here to initialize the
      71             :                 // LanguageTag because then canonicalize() would call this
      72             :                 // getKnowns() again..
      73        2745 :                 ::std::vector< OUString > aFallbacks( LanguageTag( (*it).mnLang).getFallbackStrings( true));
      74        9075 :                 for (::std::vector< OUString >::const_iterator fb( aFallbacks.begin()); fb != aFallbacks.end(); ++fb)
      75             :                 {
      76        6330 :                     rKnowns.insert( *fb);
      77             :                 }
      78        2750 :             }
      79           5 :         }
      80             :     }
      81           8 :     return rKnowns;
      82             : }
      83             : 
      84             : 
      85             : namespace {
      86             : struct compareIgnoreAsciiCaseLess
      87             : {
      88     7564475 :     bool operator()( const OUString& r1, const OUString& r2 ) const
      89             :     {
      90     7564475 :         return r1.compareToIgnoreAsciiCase( r2) < 0;
      91             :     }
      92             : };
      93             : typedef ::std::map< OUString, LanguageTag::ImplPtr, compareIgnoreAsciiCaseLess > MapBcp47;
      94             : typedef ::std::map< LanguageType, LanguageTag::ImplPtr > MapLangID;
      95             : struct theMapBcp47 : public rtl::Static< MapBcp47, theMapBcp47 > {};
      96             : struct theMapLangID : public rtl::Static< MapLangID, theMapLangID > {};
      97             : struct theDontKnow : public rtl::Static< LanguageTag::ImplPtr, theDontKnow > {};
      98             : struct theSystemLocale : public rtl::Static< LanguageTag::ImplPtr, theSystemLocale > {};
      99             : }
     100             : 
     101             : 
     102          94 : static LanguageType getNextOnTheFlyLanguage()
     103             : {
     104             :     static LanguageType nOnTheFlyLanguage = 0;
     105          94 :     osl::MutexGuard aGuard( theMutex::get());
     106          94 :     if (!nOnTheFlyLanguage)
     107          26 :         nOnTheFlyLanguage = MsLangId::makeLangID( LANGUAGE_ON_THE_FLY_SUB_START, LANGUAGE_ON_THE_FLY_START);
     108             :     else
     109             :     {
     110          68 :         if (MsLangId::getPrimaryLanguage( nOnTheFlyLanguage) != LANGUAGE_ON_THE_FLY_END)
     111          68 :             ++nOnTheFlyLanguage;
     112             :         else
     113             :         {
     114           0 :             LanguageType nSub = MsLangId::getSubLanguage( nOnTheFlyLanguage);
     115           0 :             if (nSub != LANGUAGE_ON_THE_FLY_SUB_END)
     116           0 :                 nOnTheFlyLanguage = MsLangId::makeLangID( ++nSub, LANGUAGE_ON_THE_FLY_START);
     117             :             else
     118             :             {
     119             :                 SAL_WARN( "i18nlangtag", "getNextOnTheFlyLanguage: none left! ("
     120             :                         << ((LANGUAGE_ON_THE_FLY_END - LANGUAGE_ON_THE_FLY_START + 1)
     121             :                             * (LANGUAGE_ON_THE_FLY_SUB_END - LANGUAGE_ON_THE_FLY_SUB_START + 1))
     122             :                         << " consumed?!?)");
     123           0 :                 return 0;
     124             :             }
     125             :         }
     126             :     }
     127             : #if OSL_DEBUG_LEVEL > 0
     128             :     static size_t nOnTheFlies = 0;
     129             :     ++nOnTheFlies;
     130             :     SAL_INFO( "i18nlangtag", "getNextOnTheFlyLanguage: number " << nOnTheFlies);
     131             : #endif
     132          94 :     return nOnTheFlyLanguage;
     133             : }
     134             : 
     135             : 
     136             : // static
     137      222599 : bool LanguageTag::isOnTheFlyID( LanguageType nLang )
     138             : {
     139      222599 :     LanguageType nPri = MsLangId::getPrimaryLanguage( nLang);
     140      222599 :     LanguageType nSub = MsLangId::getSubLanguage( nLang);
     141             :     return
     142        1005 :         LANGUAGE_ON_THE_FLY_START <= nPri && nPri <= LANGUAGE_ON_THE_FLY_END &&
     143      223604 :         LANGUAGE_ON_THE_FLY_SUB_START <= nSub && nSub <= LANGUAGE_ON_THE_FLY_SUB_END;
     144             : }
     145             : 
     146             : 
     147             : /** A reference holder for liblangtag data de/initialization, one static
     148             :     instance. Currently implemented such that the first "ref" inits and dtor
     149             :     (our library deinitialized) tears down.
     150             : */
     151             : class LiblantagDataRef
     152             : {
     153             : public:
     154             :     LiblantagDataRef();
     155             :     ~LiblantagDataRef();
     156          11 :     inline void incRef()
     157             :     {
     158          11 :         if (mnRef != SAL_MAX_UINT32 && !mnRef++)
     159           5 :             setup();
     160          11 :     }
     161          16 :     inline void decRef()
     162             :     {
     163          16 :         if (mnRef != SAL_MAX_UINT32 && mnRef && !--mnRef)
     164           5 :             teardown();
     165          16 :     }
     166             : private:
     167             :     OString maDataPath;   // path to liblangtag data, "|" if system
     168             :     sal_uInt32   mnRef;
     169             : 
     170             :     void setupDataPath();
     171             :     void setup();
     172             :     void teardown();
     173             : };
     174             : 
     175             : namespace {
     176             : struct theDataRef : public rtl::Static< LiblantagDataRef, theDataRef > {};
     177             : }
     178             : 
     179           5 : LiblantagDataRef::LiblantagDataRef()
     180             :     :
     181           5 :         mnRef(0)
     182             : {
     183           5 : }
     184             : 
     185          10 : LiblantagDataRef::~LiblantagDataRef()
     186             : {
     187             :     // When destructed we're tearing down unconditionally.
     188           5 :     if (mnRef)
     189           5 :         mnRef = 1;
     190           5 :     decRef();
     191           5 : }
     192             : 
     193           5 : void LiblantagDataRef::setup()
     194             : {
     195             :     SAL_INFO( "i18nlangtag", "LiblantagDataRef::setup: initializing database");
     196           5 :     if (maDataPath.isEmpty())
     197           5 :         setupDataPath();
     198           5 :     lt_db_initialize();
     199             :     // Hold ref eternally.
     200           5 :     mnRef = SAL_MAX_UINT32;
     201           5 : }
     202             : 
     203           5 : void LiblantagDataRef::teardown()
     204             : {
     205             :     SAL_INFO( "i18nlangtag", "LiblantagDataRef::teardown: finalizing database");
     206           5 :     lt_db_finalize();
     207           5 : }
     208             : 
     209           5 : void LiblantagDataRef::setupDataPath()
     210             : {
     211             :     // maDataPath is assumed to be empty here.
     212           5 :     OUString aURL("$BRAND_BASE_DIR/" LIBO_SHARE_FOLDER "/liblangtag");
     213           5 :     rtl::Bootstrap::expandMacros(aURL); //TODO: detect failure
     214             : 
     215             :     // Check if data is in our own installation, else assume system
     216             :     // installation.
     217          10 :     OUString aData( aURL);
     218           5 :     aData += "/language-subtag-registry.xml";
     219          10 :     osl::DirectoryItem aDirItem;
     220           5 :     if (osl::DirectoryItem::get( aData, aDirItem) == osl::DirectoryItem::E_None)
     221             :     {
     222           0 :         OUString aPath;
     223           0 :         if (osl::FileBase::getSystemPathFromFileURL( aURL, aPath) == osl::FileBase::E_None)
     224           0 :             maDataPath = OUStringToOString( aPath, RTL_TEXTENCODING_UTF8);
     225             :     }
     226           5 :     if (maDataPath.isEmpty())
     227           5 :         maDataPath = "|";   // assume system
     228             :     else
     229           5 :         lt_db_set_datadir( maDataPath.getStr());
     230           5 : }
     231             : 
     232             : 
     233             : /* TODO: we could transform known vendor and browser-specific variants to known
     234             :  * BCP 47 if available. For now just remove them to not confuse any later
     235             :  * treatments that check for empty variants. This vendor stuff was never
     236             :  * supported anyway. */
     237      570626 : static void handleVendorVariant( com::sun::star::lang::Locale & rLocale )
     238             : {
     239      570626 :     if (!rLocale.Variant.isEmpty() && rLocale.Language != I18NLANGTAG_QLT)
     240           7 :         rLocale.Variant = OUString();
     241      570626 : }
     242             : 
     243             : 
     244             : class LanguageTagImpl
     245             : {
     246             : public:
     247             : 
     248             :     explicit LanguageTagImpl( const LanguageTag & rLanguageTag );
     249             :     explicit LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl );
     250             :     ~LanguageTagImpl();
     251             :     LanguageTagImpl& operator=( const LanguageTagImpl & rLanguageTagImpl );
     252             : 
     253             : private:
     254             : 
     255             :     friend class LanguageTag;
     256             : 
     257             :     enum Decision
     258             :     {
     259             :         DECISION_DONTKNOW,
     260             :         DECISION_NO,
     261             :         DECISION_YES
     262             :     };
     263             : 
     264             :     mutable com::sun::star::lang::Locale    maLocale;
     265             :     mutable OUString                        maBcp47;
     266             :     mutable OUString                        maCachedLanguage;   ///< cache getLanguage()
     267             :     mutable OUString                        maCachedScript;     ///< cache getScript()
     268             :     mutable OUString                        maCachedCountry;    ///< cache getCountry()
     269             :     mutable OUString                        maCachedVariants;   ///< cache getVariants()
     270             :     mutable lt_tag_t*                       mpImplLangtag;      ///< liblangtag pointer
     271             :     mutable LanguageType                    mnLangID;
     272             :     mutable Decision                        meIsValid;
     273             :     mutable Decision                        meIsIsoLocale;
     274             :     mutable Decision                        meIsIsoODF;
     275             :     mutable Decision                        meIsLiblangtagNeeded;   ///< whether processing with liblangtag needed
     276             :             bool                            mbSystemLocale      : 1;
     277             :     mutable bool                            mbInitializedBcp47  : 1;
     278             :     mutable bool                            mbInitializedLocale : 1;
     279             :     mutable bool                            mbInitializedLangID : 1;
     280             :     mutable bool                            mbCachedLanguage    : 1;
     281             :     mutable bool                            mbCachedScript      : 1;
     282             :     mutable bool                            mbCachedCountry     : 1;
     283             :     mutable bool                            mbCachedVariants    : 1;
     284             : 
     285             :     const OUString &    getBcp47() const;
     286             :     OUString            getLanguage() const;
     287             :     OUString            getScript() const;
     288             :     OUString            getCountry() const;
     289             :     OUString            getRegion() const;
     290             :     OUString            getVariants() const;
     291             :     bool                hasScript() const;
     292             : 
     293             :     bool                isIsoLocale() const;
     294             :     bool                isIsoODF() const;
     295             :     bool                isValidBcp47() const;
     296             : 
     297             :     void                convertLocaleToBcp47();
     298             :     void                convertLocaleToLang( bool bAllowOnTheFlyID );
     299             :     void                convertBcp47ToLocale();
     300             :     void                convertBcp47ToLang();
     301             :     void                convertLangToLocale();
     302             :     void                convertLangToBcp47();
     303             : 
     304             :     /** @return whether BCP 47 language tag string was changed. */
     305             :     bool                canonicalize();
     306             : 
     307             :     /** Canonicalize if not yet done and synchronize initialized conversions.
     308             : 
     309             :         @return whether BCP 47 language tag string was changed.
     310             :      */
     311             :     bool                synCanonicalize();
     312             : 
     313             :     OUString            getLanguageFromLangtag();
     314             :     OUString            getScriptFromLangtag();
     315             :     OUString            getRegionFromLangtag();
     316             :     OUString            getVariantsFromLangtag();
     317             : 
     318             :     /** Generates on-the-fly LangID and registers the maBcp47,mnLangID pair.
     319             : 
     320             :         @param  nRegisterID
     321             :                 If not 0 and not LANGUAGE_DONTKNOW, suggest (!) to use that ID
     322             :                 instead of generating an on-the-fly ID. Implementation may
     323             :                 still generate an ID if the suggested ID is already used for
     324             :                 another language tag.
     325             : 
     326             :         @return NULL if no ID could be obtained or registration failed.
     327             :      */
     328             :     LanguageTag::ImplPtr registerOnTheFly( LanguageType nRegisterID );
     329             : 
     330             :     /** Obtain Language, Script, Country and Variants via simpleExtract() and
     331             :         assign them to the cached variables if successful.
     332             : 
     333             :         @return return of simpleExtract()
     334             :      */
     335             :     bool                cacheSimpleLSCV();
     336             : 
     337             :     enum Extraction
     338             :     {
     339             :         EXTRACTED_NONE,
     340             :         EXTRACTED_LSC,
     341             :         EXTRACTED_LV,
     342             :         EXTRACTED_X,
     343             :         EXTRACTED_X_JOKER
     344             :     };
     345             : 
     346             :     /** Of a language tag of the form lll[-Ssss][-CC][-vvvvvvvv] extract the
     347             :         portions.
     348             : 
     349             :         Does not check case or content!
     350             : 
     351             :         @return EXTRACTED_LSC if simple tag was detected (i.e. one that
     352             :                 would fulfill the isIsoODF() condition),
     353             :                 EXTRACTED_LV if a tag with variant was detected,
     354             :                 EXTRACTED_X if x-... privateuse tag was detected,
     355             :                 EXTRACTED_X_JOKER if "*" joker was detected,
     356             :                 EXTRACTED_NONE else.
     357             :      */
     358             :     static Extraction   simpleExtract( const OUString& rBcp47,
     359             :                                        OUString& rLanguage,
     360             :                                        OUString& rScript,
     361             :                                        OUString& rCountry,
     362             :                                        OUString& rVariants );
     363             : 
     364             :     /** Convert Locale to BCP 47 string without resolving system and creating
     365             :         temporary LanguageTag instances. */
     366             :     static OUString     convertToBcp47( const com::sun::star::lang::Locale& rLocale );
     367             : };
     368             : 
     369             : 
     370      242762 : LanguageTagImpl::LanguageTagImpl( const LanguageTag & rLanguageTag )
     371             :     :
     372             :         maLocale( rLanguageTag.maLocale),
     373             :         maBcp47( rLanguageTag.maBcp47),
     374             :         mpImplLangtag( NULL),
     375             :         mnLangID( rLanguageTag.mnLangID),
     376             :         meIsValid( DECISION_DONTKNOW),
     377             :         meIsIsoLocale( DECISION_DONTKNOW),
     378             :         meIsIsoODF( DECISION_DONTKNOW),
     379             :         meIsLiblangtagNeeded( DECISION_DONTKNOW),
     380             :         mbSystemLocale( rLanguageTag.mbSystemLocale),
     381             :         mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
     382             :         mbInitializedLocale( rLanguageTag.mbInitializedLocale),
     383             :         mbInitializedLangID( rLanguageTag.mbInitializedLangID),
     384             :         mbCachedLanguage( false),
     385             :         mbCachedScript( false),
     386             :         mbCachedCountry( false),
     387      242762 :         mbCachedVariants( false)
     388             : {
     389      242762 : }
     390             : 
     391             : 
     392           0 : LanguageTagImpl::LanguageTagImpl( const LanguageTagImpl & rLanguageTagImpl )
     393             :     :
     394             :         maLocale( rLanguageTagImpl.maLocale),
     395             :         maBcp47( rLanguageTagImpl.maBcp47),
     396             :         maCachedLanguage( rLanguageTagImpl.maCachedLanguage),
     397             :         maCachedScript( rLanguageTagImpl.maCachedScript),
     398             :         maCachedCountry( rLanguageTagImpl.maCachedCountry),
     399             :         maCachedVariants( rLanguageTagImpl.maCachedVariants),
     400             :         mpImplLangtag( rLanguageTagImpl.mpImplLangtag ?
     401           0 :                 lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : NULL),
     402             :         mnLangID( rLanguageTagImpl.mnLangID),
     403             :         meIsValid( rLanguageTagImpl.meIsValid),
     404             :         meIsIsoLocale( rLanguageTagImpl.meIsIsoLocale),
     405             :         meIsIsoODF( rLanguageTagImpl.meIsIsoODF),
     406             :         meIsLiblangtagNeeded( rLanguageTagImpl.meIsLiblangtagNeeded),
     407             :         mbSystemLocale( rLanguageTagImpl.mbSystemLocale),
     408             :         mbInitializedBcp47( rLanguageTagImpl.mbInitializedBcp47),
     409             :         mbInitializedLocale( rLanguageTagImpl.mbInitializedLocale),
     410             :         mbInitializedLangID( rLanguageTagImpl.mbInitializedLangID),
     411             :         mbCachedLanguage( rLanguageTagImpl.mbCachedLanguage),
     412             :         mbCachedScript( rLanguageTagImpl.mbCachedScript),
     413             :         mbCachedCountry( rLanguageTagImpl.mbCachedCountry),
     414           0 :         mbCachedVariants( rLanguageTagImpl.mbCachedVariants)
     415             : {
     416           0 :     if (mpImplLangtag)
     417           0 :         theDataRef::get().incRef();
     418           0 : }
     419             : 
     420             : 
     421           0 : LanguageTagImpl& LanguageTagImpl::operator=( const LanguageTagImpl & rLanguageTagImpl )
     422             : {
     423           0 :     if (&rLanguageTagImpl == this)
     424           0 :         return *this;
     425             : 
     426           0 :     maLocale            = rLanguageTagImpl.maLocale;
     427           0 :     maBcp47             = rLanguageTagImpl.maBcp47;
     428           0 :     maCachedLanguage    = rLanguageTagImpl.maCachedLanguage;
     429           0 :     maCachedScript      = rLanguageTagImpl.maCachedScript;
     430           0 :     maCachedCountry     = rLanguageTagImpl.maCachedCountry;
     431           0 :     maCachedVariants    = rLanguageTagImpl.maCachedVariants;
     432             :     mpImplLangtag       = rLanguageTagImpl.mpImplLangtag ?
     433           0 :                             lt_tag_copy( rLanguageTagImpl.mpImplLangtag) : NULL;
     434           0 :     mnLangID            = rLanguageTagImpl.mnLangID;
     435           0 :     meIsValid           = rLanguageTagImpl.meIsValid;
     436           0 :     meIsIsoLocale       = rLanguageTagImpl.meIsIsoLocale;
     437           0 :     meIsIsoODF          = rLanguageTagImpl.meIsIsoODF;
     438           0 :     meIsLiblangtagNeeded= rLanguageTagImpl.meIsLiblangtagNeeded;
     439           0 :     mbSystemLocale      = rLanguageTagImpl.mbSystemLocale;
     440           0 :     mbInitializedBcp47  = rLanguageTagImpl.mbInitializedBcp47;
     441           0 :     mbInitializedLocale = rLanguageTagImpl.mbInitializedLocale;
     442           0 :     mbInitializedLangID = rLanguageTagImpl.mbInitializedLangID;
     443           0 :     mbCachedLanguage    = rLanguageTagImpl.mbCachedLanguage;
     444           0 :     mbCachedScript      = rLanguageTagImpl.mbCachedScript;
     445           0 :     mbCachedCountry     = rLanguageTagImpl.mbCachedCountry;
     446           0 :     mbCachedVariants    = rLanguageTagImpl.mbCachedVariants;
     447           0 :     if (mpImplLangtag)
     448           0 :         theDataRef::get().incRef();
     449           0 :     return *this;
     450             : }
     451             : 
     452             : 
     453      485152 : LanguageTagImpl::~LanguageTagImpl()
     454             : {
     455      242576 :     if (mpImplLangtag)
     456             :     {
     457          11 :         lt_tag_unref( mpImplLangtag);
     458          11 :         theDataRef::get().decRef();
     459             :     }
     460      242576 : }
     461             : 
     462             : 
     463      328402 : LanguageTag::LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize )
     464             :     :
     465             :         maBcp47( rBcp47LanguageTag),
     466             :         mnLangID( LANGUAGE_DONTKNOW),
     467      328402 :         mbSystemLocale( rBcp47LanguageTag.isEmpty()),
     468      328402 :         mbInitializedBcp47( !mbSystemLocale),
     469             :         mbInitializedLocale( false),
     470             :         mbInitializedLangID( false),
     471      985206 :         mbIsFallback( false)
     472             : {
     473      328402 :     if (bCanonicalize)
     474             :     {
     475        6312 :         getImpl()->canonicalize();
     476             :         // Registration itself may already have canonicalized, so do an
     477             :         // unconditional sync.
     478        6312 :         syncFromImpl();
     479             :     }
     480             : 
     481      328402 : }
     482             : 
     483             : 
     484      570516 : LanguageTag::LanguageTag( const com::sun::star::lang::Locale & rLocale )
     485             :     :
     486             :         maLocale( rLocale),
     487             :         mnLangID( LANGUAGE_DONTKNOW),
     488      570516 :         mbSystemLocale( rLocale.Language.isEmpty()),
     489             :         mbInitializedBcp47( false),
     490      570516 :         mbInitializedLocale( !mbSystemLocale),
     491             :         mbInitializedLangID( false),
     492     1711548 :         mbIsFallback( false)
     493             : {
     494      570516 :     handleVendorVariant( maLocale);
     495      570516 : }
     496             : 
     497             : 
     498      716436 : LanguageTag::LanguageTag( LanguageType nLanguage )
     499             :     :
     500             :         mnLangID( nLanguage),
     501      716436 :         mbSystemLocale( nLanguage == LANGUAGE_SYSTEM),
     502             :         mbInitializedBcp47( false),
     503             :         mbInitializedLocale( false),
     504      716436 :         mbInitializedLangID( !mbSystemLocale),
     505     2149308 :         mbIsFallback( false)
     506             : {
     507      716436 : }
     508             : 
     509             : 
     510         554 : LanguageTag::LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
     511             :                           const OUString& rScript, const OUString& rCountry )
     512             :     :
     513             :         maBcp47( rBcp47),
     514             :         mnLangID( LANGUAGE_DONTKNOW),
     515         554 :         mbSystemLocale( rBcp47.isEmpty() && rLanguage.isEmpty()),
     516         554 :         mbInitializedBcp47( !rBcp47.isEmpty()),
     517             :         mbInitializedLocale( false),
     518             :         mbInitializedLangID( false),
     519        1662 :         mbIsFallback( false)
     520             : {
     521         554 :     if (!mbSystemLocale && !mbInitializedBcp47)
     522             :     {
     523         554 :         if (rScript.isEmpty())
     524             :         {
     525         554 :             maBcp47 = rLanguage + "-" + rCountry;
     526         554 :             mbInitializedBcp47 = true;
     527         554 :             maLocale.Language = rLanguage;
     528         554 :             maLocale.Country  = rCountry;
     529         554 :             mbInitializedLocale = true;
     530             :         }
     531             :         else
     532             :         {
     533           0 :             if (rCountry.isEmpty())
     534           0 :                 maBcp47 = rLanguage + "-" + rScript;
     535             :             else
     536           0 :                 maBcp47 = rLanguage + "-" + rScript + "-" + rCountry;
     537           0 :             mbInitializedBcp47 = true;
     538           0 :             maLocale.Language = I18NLANGTAG_QLT;
     539           0 :             maLocale.Country  = rCountry;
     540           0 :             maLocale.Variant  = maBcp47;
     541           0 :             mbInitializedLocale = true;
     542             :         }
     543             :     }
     544         554 : }
     545             : 
     546             : 
     547         137 : LanguageTag::LanguageTag( const rtl_Locale & rLocale )
     548             :     :
     549             :         maLocale( rLocale.Language, rLocale.Country, rLocale.Variant),
     550             :         mnLangID( LANGUAGE_DONTKNOW),
     551         137 :         mbSystemLocale( maLocale.Language.isEmpty()),
     552             :         mbInitializedBcp47( false),
     553         137 :         mbInitializedLocale( !mbSystemLocale),
     554             :         mbInitializedLangID( false),
     555         411 :         mbIsFallback( false)
     556             : {
     557         137 :     convertFromRtlLocale();
     558         137 : }
     559             : 
     560             : 
     561    10432323 : LanguageTag::LanguageTag( const LanguageTag & rLanguageTag )
     562             :     :
     563             :         maLocale( rLanguageTag.maLocale),
     564             :         maBcp47( rLanguageTag.maBcp47),
     565             :         mnLangID( rLanguageTag.mnLangID),
     566             :         mpImpl( rLanguageTag.mpImpl),
     567             :         mbSystemLocale( rLanguageTag.mbSystemLocale),
     568             :         mbInitializedBcp47( rLanguageTag.mbInitializedBcp47),
     569             :         mbInitializedLocale( rLanguageTag.mbInitializedLocale),
     570             :         mbInitializedLangID( rLanguageTag.mbInitializedLangID),
     571    10432323 :         mbIsFallback(rLanguageTag.mbIsFallback)
     572             : {
     573    10432323 : }
     574             : 
     575             : 
     576      109105 : LanguageTag& LanguageTag::operator=( const LanguageTag & rLanguageTag )
     577             : {
     578      109105 :     if (&rLanguageTag == this)
     579           0 :         return *this;
     580             : 
     581      109105 :     maLocale            = rLanguageTag.maLocale;
     582      109105 :     maBcp47             = rLanguageTag.maBcp47;
     583      109105 :     mnLangID            = rLanguageTag.mnLangID;
     584      109105 :     mpImpl              = rLanguageTag.mpImpl;
     585      109105 :     mbSystemLocale      = rLanguageTag.mbSystemLocale;
     586      109105 :     mbInitializedBcp47  = rLanguageTag.mbInitializedBcp47;
     587      109105 :     mbInitializedLocale = rLanguageTag.mbInitializedLocale;
     588      109105 :     mbInitializedLangID = rLanguageTag.mbInitializedLangID;
     589      109105 :     return *this;
     590             : }
     591             : 
     592             : 
     593    12023405 : LanguageTag::~LanguageTag()
     594             : {
     595    12023405 : }
     596             : 
     597             : 
     598        3530 : LanguageTag::ImplPtr LanguageTagImpl::registerOnTheFly( LanguageType nRegisterID )
     599             : {
     600        3530 :     LanguageTag::ImplPtr pImpl;
     601             : 
     602        3530 :     if (!mbInitializedBcp47)
     603             :     {
     604           0 :         if (mbInitializedLocale)
     605             :         {
     606           0 :             maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
     607           0 :             mbInitializedBcp47 = !maBcp47.isEmpty();
     608             :         }
     609             :     }
     610        3530 :     if (maBcp47.isEmpty())
     611             :     {
     612             :         SAL_WARN( "i18nlangtag", "LanguageTagImpl::registerOnTheFly: no Bcp47 string, no registering");
     613           0 :         return pImpl;
     614             :     }
     615             : 
     616        7060 :     osl::MutexGuard aGuard( theMutex::get());
     617             : 
     618        3530 :     MapBcp47& rMapBcp47 = theMapBcp47::get();
     619        3530 :     MapBcp47::const_iterator it( rMapBcp47.find( maBcp47));
     620        3530 :     bool bOtherImpl = false;
     621        3530 :     if (it != rMapBcp47.end())
     622             :     {
     623             :         SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: found impl for '" << maBcp47 << "'");
     624        3530 :         pImpl = (*it).second;
     625        3530 :         if (pImpl.get() != this)
     626             :         {
     627             :             // Could happen for example if during registerImpl() the tag was
     628             :             // changed via canonicalize() and the result was already present in
     629             :             // the map before, for example 'bn-Beng' => 'bn'. This specific
     630             :             // case is now taken care of in registerImpl() and doesn't reach
     631             :             // here. However, use the already existing impl if it matches.
     632             :             SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: using other impl for this '" << maBcp47 << "'");
     633           0 :             *this = *pImpl;     // ensure consistency
     634           0 :             bOtherImpl = true;
     635             :         }
     636             :     }
     637             :     else
     638             :     {
     639             :         SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: new impl for '" << maBcp47 << "'");
     640           0 :         pImpl.reset( new LanguageTagImpl( *this));
     641           0 :         rMapBcp47.insert( ::std::make_pair( maBcp47, pImpl));
     642             :     }
     643             : 
     644        3530 :     if (!bOtherImpl || !pImpl->mbInitializedLangID)
     645             :     {
     646        3530 :         if (nRegisterID == 0 || nRegisterID == LANGUAGE_DONTKNOW)
     647          93 :             nRegisterID = getNextOnTheFlyLanguage();
     648             :         else
     649             :         {
     650             :             // Accept a suggested ID only if it is not mapped yet to something
     651             :             // different, otherwise we would end up with ambiguous assignments
     652             :             // of different language tags, for example for the same primary
     653             :             // LangID with "no", "nb" and "nn".
     654        3437 :             const MapLangID& rMapLangID = theMapLangID::get();
     655        3437 :             MapLangID::const_iterator itID( rMapLangID.find( nRegisterID));
     656        3437 :             if (itID != rMapLangID.end())
     657             :             {
     658           1 :                 if ((*itID).second->maBcp47 != maBcp47)
     659             :                 {
     660             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: not using suggested 0x"
     661             :                             << ::std::hex << nRegisterID << " for '" << maBcp47 << "' have '"
     662             :                             << (*itID).second->maBcp47 << "'");
     663           1 :                     nRegisterID = getNextOnTheFlyLanguage();
     664             :                 }
     665             :                 else
     666             :                 {
     667             :                     SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: suggested 0x"
     668             :                             << ::std::hex << nRegisterID << " for '" << maBcp47 << "' already registered");
     669             :                 }
     670             :             }
     671             :         }
     672        3530 :         if (!nRegisterID)
     673             :         {
     674             :             // out of IDs, nothing to register
     675           0 :             return pImpl;
     676             :         }
     677        3530 :         pImpl->mnLangID = nRegisterID;
     678        3530 :         pImpl->mbInitializedLangID = true;
     679        3530 :         if (pImpl.get() != this)
     680             :         {
     681           0 :             mnLangID = nRegisterID;
     682           0 :             mbInitializedLangID = true;
     683             :         }
     684             :     }
     685             : 
     686             :     ::std::pair< MapLangID::const_iterator, bool > res(
     687        3530 :             theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
     688        3530 :     if (res.second)
     689             :     {
     690             :         SAL_INFO( "i18nlangtag", "LanguageTag::registerOnTheFly: cross-inserted 0x"
     691             :                 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
     692             :     }
     693             :     else
     694             :     {
     695             :         SAL_WARN( "i18nlangtag", "LanguageTag::registerOnTheFly: not cross-inserted 0x"
     696             :                 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
     697             :                 << (*res.first).second->maBcp47 << "'");
     698             :     }
     699             : 
     700        3530 :     return pImpl;
     701             : }
     702             : 
     703             : // static
     704         153 : void LanguageTag::setConfiguredSystemLanguage( LanguageType nLang )
     705             : {
     706         153 :     if (nLang == LANGUAGE_DONTKNOW || nLang == LANGUAGE_SYSTEM)
     707             :     {
     708             :         SAL_WARN( "i18nlangtag",
     709             :                 "LanguageTag::setConfiguredSystemLanguage: refusing to set unresolved system locale 0x" <<
     710             :                 ::std::hex << nLang);
     711           0 :         return;
     712             :     }
     713             :     SAL_INFO( "i18nlangtag", "LanguageTag::setConfiguredSystemLanguage: setting to 0x" << ::std::hex << nLang);
     714         153 :     MsLangId::LanguageTagAccess::setConfiguredSystemLanguage( nLang);
     715             :     // Resest system locale to none and let registerImpl() do the rest to
     716             :     // initialize a new one.
     717         153 :     theSystemLocale::get().reset();
     718         153 :     LanguageTag aLanguageTag( LANGUAGE_SYSTEM);
     719         153 :     aLanguageTag.registerImpl();
     720             : }
     721             : 
     722      222595 : static bool lcl_isKnownOnTheFlyID( LanguageType nLang )
     723             : {
     724      450866 :     return nLang != LANGUAGE_DONTKNOW && nLang != LANGUAGE_SYSTEM &&
     725      667687 :         (LanguageTag::isOnTheFlyID( nLang) || (nLang == MsLangId::getPrimaryLanguage( nLang)));
     726             : }
     727             : 
     728             : 
     729     2460385 : LanguageTag::ImplPtr LanguageTag::registerImpl() const
     730             : {
     731             :     // XXX NOTE: Do not use non-static LanguageTag::convert...() member methods
     732             :     // here as they access getImpl() and syncFromImpl() and would lead to
     733             :     // recursion. Also do not use the static LanguageTag::convertTo...()
     734             :     // methods as they may create temporary LanguageTag instances. Only
     735             :     // LanguageTagImpl::convertToBcp47(Locale) is ok.
     736             : 
     737     2460385 :     ImplPtr pImpl;
     738             : 
     739             : #if OSL_DEBUG_LEVEL > 0
     740             :     static size_t nCalls = 0;
     741             :     ++nCalls;
     742             :     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCalls << " calls");
     743             : #endif
     744             : 
     745             :     // Do not register unresolved system locale, also force LangID if system
     746             :     // and take the system locale shortcut if possible.
     747     2460385 :     if (mbSystemLocale)
     748             :     {
     749      201999 :         pImpl = theSystemLocale::get();
     750      201999 :         if (pImpl)
     751             :         {
     752             : #if OSL_DEBUG_LEVEL > 0
     753             :             static size_t nCallsSystem = 0;
     754             :             ++nCallsSystem;
     755             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystem << " system calls");
     756             : #endif
     757      201802 :             return pImpl;
     758             :         }
     759         197 :         if (!mbInitializedLangID)
     760             :         {
     761         197 :             mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
     762         197 :             mbInitializedLangID = (mnLangID != LANGUAGE_SYSTEM);
     763             :             SAL_WARN_IF( !mbInitializedLangID, "i18nlangtag", "LanguageTag::registerImpl: can't resolve system!");
     764             :         }
     765             :     }
     766             : 
     767     2258583 :     if (mbInitializedLangID)
     768             :     {
     769     1402007 :         if (mnLangID == LANGUAGE_DONTKNOW)
     770             :         {
     771             :             // Heavy usage of LANGUAGE_DONTKNOW, make it an own Impl for all the
     772             :             // conversion attempts. At the same time provide a central breakpoint
     773             :             // to inspect such places.
     774      623870 :             LanguageTag::ImplPtr& rDontKnow = theDontKnow::get();
     775      623870 :             if (!rDontKnow)
     776         131 :                 rDontKnow.reset( new LanguageTagImpl( *this));
     777      623870 :             pImpl = rDontKnow;
     778             : #if OSL_DEBUG_LEVEL > 0
     779             :             static size_t nCallsDontKnow = 0;
     780             :             ++nCallsDontKnow;
     781             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsDontKnow << " DontKnow calls");
     782             : #endif
     783      623870 :             return pImpl;
     784             :         }
     785             :         else
     786             :         {
     787             :             // A great share are calls for a system equal locale.
     788      778137 :             pImpl = theSystemLocale::get();
     789      778137 :             if (pImpl && pImpl->mnLangID == mnLangID)
     790             :             {
     791             : #if OSL_DEBUG_LEVEL > 0
     792             :                 static size_t nCallsSystemEqual = 0;
     793             :                 ++nCallsSystemEqual;
     794             :                 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual
     795             :                         << " system equal LangID calls");
     796             : #endif
     797      458831 :                 return pImpl;
     798             :             }
     799             :         }
     800             :     }
     801             : 
     802             :     // Force Bcp47 if not LangID.
     803     1175882 :     if (!mbInitializedLangID && !mbInitializedBcp47 && mbInitializedLocale)
     804             :     {
     805      530888 :         maBcp47 = LanguageTagImpl::convertToBcp47( maLocale);
     806      530888 :         mbInitializedBcp47 = !maBcp47.isEmpty();
     807             :     }
     808             : 
     809     1175882 :     if (mbInitializedBcp47)
     810             :     {
     811             :         // A great share are calls for a system equal locale.
     812      856651 :         pImpl = theSystemLocale::get();
     813      856651 :         if (pImpl && pImpl->maBcp47 == maBcp47)
     814             :         {
     815             : #if OSL_DEBUG_LEVEL > 0
     816             :             static size_t nCallsSystemEqual = 0;
     817             :             ++nCallsSystemEqual;
     818             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsSystemEqual << " system equal BCP47 calls");
     819             : #endif
     820      314739 :             return pImpl;
     821             :         }
     822             :     }
     823             : 
     824             : #if OSL_DEBUG_LEVEL > 0
     825             :     static size_t nCallsNonSystem = 0;
     826             :     ++nCallsNonSystem;
     827             :     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << nCallsNonSystem << " non-system calls");
     828             : #endif
     829             : 
     830     1722286 :     osl::MutexGuard aGuard( theMutex::get());
     831             : 
     832             : #if OSL_DEBUG_LEVEL > 0
     833             :     static long nRunning = 0;
     834             :     // Entering twice here is ok, which is needed for fallback init in
     835             :     // getKnowns() in canonicalize() via pImpl->convertBcp47ToLocale() below,
     836             :     // everything else is suspicious.
     837             :     SAL_WARN_IF( nRunning > 1, "i18nlangtag", "LanguageTag::registerImpl: re-entered for '"
     838             :             << maBcp47 << "' 0x" << ::std::hex << mnLangID );
     839             :     struct Runner { Runner() { ++nRunning; } ~Runner() { --nRunning; } } aRunner;
     840             : #endif
     841             : 
     842             :     // Prefer LangID map as find+insert needs less comparison work.
     843      861143 :     if (mbInitializedLangID)
     844             :     {
     845      319306 :         MapLangID& rMap = theMapLangID::get();
     846      319306 :         MapLangID::const_iterator it( rMap.find( mnLangID));
     847      319306 :         if (it != rMap.end())
     848             :         {
     849             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for 0x" << ::std::hex << mnLangID);
     850      316900 :             pImpl = (*it).second;
     851             :         }
     852             :         else
     853             :         {
     854             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for 0x" << ::std::hex << mnLangID);
     855        2406 :             pImpl.reset( new LanguageTagImpl( *this));
     856        2406 :             rMap.insert( ::std::make_pair( mnLangID, pImpl));
     857             :             // Try round-trip.
     858        2406 :             if (!pImpl->mbInitializedLocale)
     859        2401 :                 pImpl->convertLangToLocale();
     860        2406 :             LanguageType nLang = MsLangId::Conversion::convertLocaleToLanguage( pImpl->maLocale);
     861             :             // If round-trip is identical cross-insert to Bcp47 map.
     862        2406 :             if (nLang == pImpl->mnLangID)
     863             :             {
     864        2261 :                 if (!pImpl->mbInitializedBcp47)
     865        2256 :                     pImpl->convertLocaleToBcp47();
     866             :                 ::std::pair< MapBcp47::const_iterator, bool > res(
     867        2261 :                         theMapBcp47::get().insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
     868        2261 :                 if (res.second)
     869             :                 {
     870             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted '" << pImpl->maBcp47 << "'"
     871             :                             << " for 0x" << ::std::hex << mnLangID);
     872             :                 }
     873             :                 else
     874             :                 {
     875             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "'"
     876             :                             << " for 0x" << ::std::hex << mnLangID << " have 0x"
     877             :                             << ::std::hex << (*res.first).second->mnLangID);
     878             :                 }
     879             :             }
     880             :             else
     881             :             {
     882         145 :                 if (!pImpl->mbInitializedBcp47)
     883         145 :                     pImpl->convertLocaleToBcp47();
     884             :                 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted '" << pImpl->maBcp47 << "'"
     885             :                         << " for 0x" << ::std::hex << mnLangID << " round-trip to 0x" << ::std::hex << nLang);
     886             :             }
     887             :         }
     888             :     }
     889      541837 :     else if (!maBcp47.isEmpty())
     890             :     {
     891      541837 :         MapBcp47& rMap = theMapBcp47::get();
     892      541837 :         MapBcp47::const_iterator it( rMap.find( maBcp47));
     893      541837 :         if (it != rMap.end())
     894             :         {
     895             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: found impl for '" << maBcp47 << "'");
     896      301612 :             pImpl = (*it).second;
     897             :         }
     898             :         else
     899             :         {
     900             :             SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: new impl for '" << maBcp47 << "'");
     901      240225 :             pImpl.reset( new LanguageTagImpl( *this));
     902      240225 :             ::std::pair< MapBcp47::iterator, bool > insOrig( rMap.insert( ::std::make_pair( maBcp47, pImpl)));
     903             :             // If changed after canonicalize() also add the resulting tag to
     904             :             // the map.
     905      240225 :             if (pImpl->synCanonicalize())
     906             :             {
     907             :                 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: canonicalized to '" << pImpl->maBcp47 << "'");
     908             :                 ::std::pair< MapBcp47::const_iterator, bool > insCanon(
     909       24424 :                         rMap.insert( ::std::make_pair( pImpl->maBcp47, pImpl)));
     910             :                 SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: " << (insCanon.second ? "" : "not ")
     911             :                         << "inserted '" << pImpl->maBcp47 << "'");
     912             :                 // If the canonicalized tag already existed (was not inserted)
     913             :                 // and impls are different, make this impl that impl and skip
     914             :                 // the rest if that LangID is present as well. The existing
     915             :                 // entry may or may not be different, it may even be strictly
     916             :                 // identical to this if it differs only in case (e.g. ko-kr =>
     917             :                 // ko-KR) which was corrected in canonicalize() hence also in
     918             :                 // the map entry but comparison is case insensitive and found
     919             :                 // it again.
     920       24424 :                 if (!insCanon.second && (*insCanon.first).second != pImpl)
     921             :                 {
     922       17630 :                     (*insOrig.first).second = pImpl = (*insCanon.first).second;
     923             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: share impl with 0x"
     924             :                             << ::std::hex << pImpl->mnLangID);
     925             :                 }
     926             :             }
     927      240225 :             if (!pImpl->mbInitializedLangID)
     928             :             {
     929             :                 // Try round-trip Bcp47->Locale->LangID->Locale->Bcp47.
     930      222595 :                 if (!pImpl->mbInitializedLocale)
     931      222521 :                     pImpl->convertBcp47ToLocale();
     932      222595 :                 if (!pImpl->mbInitializedLangID)
     933      222595 :                     pImpl->convertLocaleToLang( true);
     934             :                 // Unconditionally insert (round-trip is possible) for
     935             :                 // on-the-fly IDs and (generated or not) suggested IDs.
     936      222595 :                 bool bInsert = lcl_isKnownOnTheFlyID( pImpl->mnLangID);
     937      222595 :                 OUString aBcp47;
     938      222595 :                 if (!bInsert)
     939             :                 {
     940      216917 :                     if (pImpl->mnLangID != LANGUAGE_DONTKNOW)
     941             :                     {
     942             :                         // May have involved canonicalize(), so compare with
     943             :                         // pImpl->maBcp47 instead of maBcp47!
     944      433830 :                         aBcp47 = LanguageTagImpl::convertToBcp47(
     945      433830 :                                 MsLangId::Conversion::convertLanguageToLocale( pImpl->mnLangID, true));
     946      216915 :                         bInsert = (aBcp47 == pImpl->maBcp47);
     947             :                     }
     948             :                 }
     949             :                 // If round-trip is identical cross-insert to Bcp47 map.
     950      222595 :                 if (bInsert)
     951             :                 {
     952             :                     ::std::pair< MapLangID::const_iterator, bool > res(
     953      209702 :                             theMapLangID::get().insert( ::std::make_pair( pImpl->mnLangID, pImpl)));
     954      209702 :                     if (res.second)
     955             :                     {
     956             :                         SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: cross-inserted 0x"
     957             :                                 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "'");
     958             :                     }
     959             :                     else
     960             :                     {
     961             :                         SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
     962             :                                 << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' have '"
     963             :                                 << (*res.first).second->maBcp47 << "'");
     964             :                     }
     965             :                 }
     966             :                 else
     967             :                 {
     968             :                     SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: not cross-inserted 0x"
     969             :                             << ::std::hex << pImpl->mnLangID << " for '" << maBcp47 << "' round-trip to '"
     970             :                             << aBcp47 << "'");
     971      222595 :                 }
     972             :             }
     973             :         }
     974             :     }
     975             :     else
     976             :     {
     977             :         SAL_WARN( "i18nlangtag", "LanguageTag::registerImpl: can't register for 0x" << ::std::hex << mnLangID );
     978           0 :         pImpl.reset( new LanguageTagImpl( *this));
     979             :     }
     980             : 
     981             :     // If we reach here for mbSystemLocale we didn't have theSystemLocale
     982             :     // above, so add it.
     983      861143 :     if (mbSystemLocale && mbInitializedLangID)
     984             :     {
     985         197 :         theSystemLocale::get() = pImpl;
     986             :         SAL_INFO( "i18nlangtag", "LanguageTag::registerImpl: added system locale 0x"
     987             :                 << ::std::hex << pImpl->mnLangID << " '" << pImpl->maBcp47 << "'");
     988             :     }
     989             : 
     990      861143 :     return pImpl;
     991             : }
     992             : 
     993             : 
     994     3981056 : LanguageTag::ImplPtr LanguageTag::getImpl() const
     995             : {
     996     3981056 :     if (!mpImpl)
     997             :     {
     998     2460157 :         mpImpl = registerImpl();
     999     2460157 :         syncVarsFromRawImpl();
    1000             :     }
    1001     3981056 :     return mpImpl;
    1002             : }
    1003             : 
    1004             : 
    1005     2814232 : void LanguageTag::resetVars()
    1006             : {
    1007     2814232 :     mpImpl.reset();
    1008     2814232 :     maLocale            = lang::Locale();
    1009     2814232 :     maBcp47             = OUString();
    1010     2814232 :     mnLangID            = LANGUAGE_SYSTEM;
    1011     2814232 :     mbSystemLocale      = true;
    1012     2814232 :     mbInitializedBcp47  = false;
    1013     2814232 :     mbInitializedLocale = false;
    1014     2814232 :     mbInitializedLangID = false;
    1015     2814232 :     mbIsFallback        = false;
    1016     2814232 : }
    1017             : 
    1018             : 
    1019         373 : LanguageTag & LanguageTag::reset( const OUString & rBcp47LanguageTag, bool bCanonicalize )
    1020             : {
    1021         373 :     resetVars();
    1022         373 :     maBcp47             = rBcp47LanguageTag;
    1023         373 :     mbSystemLocale      = rBcp47LanguageTag.isEmpty();
    1024         373 :     mbInitializedBcp47  = !mbSystemLocale;
    1025             : 
    1026         373 :     if (bCanonicalize)
    1027             :     {
    1028           0 :         getImpl()->canonicalize();
    1029             :         // Registration itself may already have canonicalized, so do an
    1030             :         // unconditional sync.
    1031           0 :         syncFromImpl();
    1032             :     }
    1033         373 :     return *this;
    1034             : }
    1035             : 
    1036             : 
    1037         110 : LanguageTag & LanguageTag::reset( const com::sun::star::lang::Locale & rLocale )
    1038             : {
    1039         110 :     resetVars();
    1040         110 :     maLocale            = rLocale;
    1041         110 :     mbSystemLocale      = rLocale.Language.isEmpty();
    1042         110 :     mbInitializedLocale = !mbSystemLocale;
    1043         110 :     handleVendorVariant( maLocale);
    1044         110 :     return *this;
    1045             : }
    1046             : 
    1047             : 
    1048     2813749 : LanguageTag & LanguageTag::reset( LanguageType nLanguage )
    1049             : {
    1050     2813749 :     resetVars();
    1051     2813749 :     mnLangID            = nLanguage;
    1052     2813749 :     mbSystemLocale      = nLanguage == LANGUAGE_SYSTEM;
    1053     2813749 :     mbInitializedLangID = !mbSystemLocale;
    1054     2813749 :     return *this;
    1055             : }
    1056             : 
    1057             : 
    1058      248993 : bool LanguageTagImpl::canonicalize()
    1059             : {
    1060             : #ifdef erDEBUG
    1061             :     // dump once
    1062             :     struct dumper
    1063             :     {
    1064             :         lt_tag_t** mpp;
    1065             :         dumper( lt_tag_t** pp ) : mpp( *pp ? NULL : pp) {}
    1066             :         ~dumper() { if (mpp && *mpp) lt_tag_dump( *mpp); }
    1067             :     };
    1068             :     dumper aDumper( &mpImplLangtag);
    1069             : #endif
    1070             : 
    1071      248993 :     bool bChanged = false;
    1072             : 
    1073             :     // Side effect: have maBcp47 in any case, resolved system.
    1074             :     // Some methods calling canonicalize() (or not calling it due to
    1075             :     // meIsLiblangtagNeeded==DECISION_NO) rely on this! Hence do not set
    1076             :     // meIsLiblangtagNeeded anywhere else than hereafter.
    1077      248993 :     getBcp47();
    1078             : 
    1079             :     // The simple cases and known locales don't need liblangtag processing,
    1080             :     // which also avoids loading liblangtag data on startup.
    1081      248993 :     if (meIsLiblangtagNeeded == DECISION_DONTKNOW)
    1082             :     {
    1083      242681 :         bool bTemporaryLocale = false;
    1084      242681 :         bool bTemporaryLangID = false;
    1085      242681 :         if (!mbInitializedLocale && !mbInitializedLangID)
    1086             :         {
    1087      240151 :             if (mbSystemLocale)
    1088             :             {
    1089           0 :                 mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
    1090           0 :                 mbInitializedLangID = true;
    1091             :             }
    1092             :             else
    1093             :             {
    1094             :                 // Now this is getting funny.. we only have some BCP47 string
    1095             :                 // and want to determine if parsing it would be possible
    1096             :                 // without using liblangtag just to see if it is a simple known
    1097             :                 // locale or could fall back to one.
    1098      480302 :                 OUString aLanguage, aScript, aCountry, aVariants;
    1099      240151 :                 Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
    1100      240151 :                 if (eExt != EXTRACTED_NONE)
    1101             :                 {
    1102      240147 :                     if (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV)
    1103             :                     {
    1104             :                         // Rebuild bcp47 with proper casing of tags.
    1105      240139 :                         OUStringBuffer aBuf( aLanguage.getLength() + 1 + aScript.getLength() +
    1106      240139 :                                 1 + aCountry.getLength() + 1 + aVariants.getLength());
    1107      240139 :                         aBuf.append( aLanguage);
    1108      240139 :                         if (!aScript.isEmpty())
    1109       30829 :                             aBuf.append("-" + aScript);
    1110      240139 :                         if (!aCountry.isEmpty())
    1111      210860 :                             aBuf.append("-" + aCountry);
    1112      240139 :                         if (!aVariants.isEmpty())
    1113         904 :                             aBuf.append("-" + aVariants);
    1114      480278 :                         OUString aStr( aBuf.makeStringAndClear());
    1115             : 
    1116      240139 :                         if (maBcp47 != aStr)
    1117             :                         {
    1118         930 :                             maBcp47 = aStr;
    1119         930 :                             bChanged = true;
    1120      240139 :                         }
    1121             :                     }
    1122      240147 :                     if (eExt == EXTRACTED_LSC && aScript.isEmpty())
    1123             :                     {
    1124      208406 :                         maLocale.Language = aLanguage;
    1125      208406 :                         maLocale.Country  = aCountry;
    1126             :                     }
    1127             :                     else
    1128             :                     {
    1129       31741 :                         maLocale.Language = I18NLANGTAG_QLT;
    1130       31741 :                         maLocale.Country  = aCountry;
    1131       31741 :                         maLocale.Variant  = maBcp47;
    1132             :                     }
    1133      240147 :                     bTemporaryLocale = mbInitializedLocale = true;
    1134      240151 :                 }
    1135             :             }
    1136             :         }
    1137      242681 :         if (mbInitializedLangID && !mbInitializedLocale)
    1138             :         {
    1139             :             // Do not call getLocale() here because that prefers
    1140             :             // convertBcp47ToLocale() which would end up in recursion via
    1141             :             // isIsoLocale()!
    1142             : 
    1143             :             // Prepare to verify that we have a known locale, not just an
    1144             :             // arbitrary MS-LangID.
    1145           0 :             convertLangToLocale();
    1146             :         }
    1147      242681 :         if (mbInitializedLocale)
    1148             :         {
    1149      242677 :             if (maLocale.Variant.isEmpty())
    1150      210587 :                 meIsLiblangtagNeeded = DECISION_NO;     // per definition ll[l][-CC]
    1151             :             else
    1152             :             {
    1153       32090 :                 if (!mbInitializedLangID)
    1154             :                 {
    1155       31741 :                     convertLocaleToLang( false);
    1156       31741 :                     if (bTemporaryLocale || mnLangID == LANGUAGE_DONTKNOW)
    1157       31741 :                         bTemporaryLangID = true;
    1158             :                 }
    1159       32090 :                 if (mnLangID != LANGUAGE_DONTKNOW && mnLangID != LANGUAGE_SYSTEM)
    1160       32082 :                     meIsLiblangtagNeeded = DECISION_NO; // known locale
    1161             :                 else
    1162             :                 {
    1163           8 :                     const KnownTagSet& rKnowns = getKnowns();
    1164           8 :                     if (rKnowns.find( maBcp47) != rKnowns.end())
    1165           1 :                         meIsLiblangtagNeeded = DECISION_NO; // known fallback
    1166             :                 }
    1167             :             }
    1168             :             // We may have an internal override "canonicalization".
    1169      242677 :             lang::Locale aNew( MsLangId::Conversion::getOverride( maLocale));
    1170      505396 :             if (!aNew.Language.isEmpty() &&
    1171      455189 :                     (aNew.Language != maLocale.Language ||
    1172      431622 :                      aNew.Country  != maLocale.Country ||
    1173      215585 :                      aNew.Variant  != maLocale.Variant))
    1174             :             {
    1175       23567 :                 maBcp47 = LanguageTagImpl::convertToBcp47( aNew);
    1176       23567 :                 bChanged = true;
    1177       23567 :                 meIsIsoLocale = DECISION_DONTKNOW;
    1178       23567 :                 meIsIsoODF = DECISION_DONTKNOW;
    1179       23567 :                 meIsLiblangtagNeeded = DECISION_NO; // known locale
    1180      242677 :             }
    1181             :         }
    1182      242681 :         if (bTemporaryLocale)
    1183             :         {
    1184      240147 :             mbInitializedLocale = false;
    1185      240147 :             maLocale = lang::Locale();
    1186             :         }
    1187      242681 :         if (bTemporaryLangID)
    1188             :         {
    1189       31741 :             mbInitializedLangID = false;
    1190       31741 :             mnLangID = LANGUAGE_DONTKNOW;
    1191             :         }
    1192             :     }
    1193      248993 :     if (meIsLiblangtagNeeded == DECISION_NO)
    1194             :     {
    1195      248980 :         meIsValid = DECISION_YES;   // really, known must be valid ...
    1196      248980 :         return bChanged;            // that's it
    1197             :     }
    1198             : 
    1199          13 :     meIsLiblangtagNeeded = DECISION_YES;
    1200             :     SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: using liblangtag for '" << maBcp47 << "'");
    1201             : 
    1202          13 :     if (!mpImplLangtag)
    1203             :     {
    1204          11 :         theDataRef::get().incRef();
    1205          11 :         mpImplLangtag = lt_tag_new();
    1206             :     }
    1207             : 
    1208          13 :     myLtError aError;
    1209             : 
    1210          13 :     if (lt_tag_parse( mpImplLangtag, OUStringToOString( maBcp47, RTL_TEXTENCODING_UTF8).getStr(), &aError.p))
    1211             :     {
    1212           9 :         char* pTag = lt_tag_canonicalize( mpImplLangtag, &aError.p);
    1213             :         SAL_WARN_IF( !pTag, "i18nlangtag", "LanguageTagImpl::canonicalize: could not canonicalize '" << maBcp47 << "'");
    1214           9 :         if (pTag)
    1215             :         {
    1216           9 :             OUString aNew( OUString::createFromAscii( pTag));
    1217             :             // Make the lt_tag_t follow the new string if different, which
    1218             :             // removes default script and such.
    1219           9 :             if (maBcp47 != aNew)
    1220             :             {
    1221           2 :                 maBcp47 = aNew;
    1222           2 :                 bChanged = true;
    1223           2 :                 meIsIsoLocale = DECISION_DONTKNOW;
    1224           2 :                 meIsIsoODF = DECISION_DONTKNOW;
    1225           2 :                 if (!lt_tag_parse( mpImplLangtag, pTag, &aError.p))
    1226             :                 {
    1227             :                     SAL_WARN( "i18nlangtag", "LanguageTagImpl::canonicalize: could not reparse '" << maBcp47 << "'");
    1228           0 :                     free( pTag);
    1229           0 :                     meIsValid = DECISION_NO;
    1230           0 :                     return bChanged;
    1231             :                 }
    1232             :             }
    1233           9 :             free( pTag);
    1234           9 :             meIsValid = DECISION_YES;
    1235           9 :             return bChanged;
    1236             :         }
    1237             :     }
    1238             :     else
    1239             :     {
    1240             :         SAL_INFO( "i18nlangtag", "LanguageTagImpl::canonicalize: could not parse '" << maBcp47 << "'");
    1241             :     }
    1242           4 :     meIsValid = DECISION_NO;
    1243           4 :     return bChanged;
    1244             : }
    1245             : 
    1246             : 
    1247     1285497 : bool LanguageTagImpl::synCanonicalize()
    1248             : {
    1249     1285497 :     bool bChanged = false;
    1250     1285497 :     if (meIsLiblangtagNeeded != DECISION_NO && !mpImplLangtag)
    1251             :     {
    1252      242681 :         bChanged = canonicalize();
    1253      242681 :         if (bChanged)
    1254             :         {
    1255       24499 :             if (mbInitializedLocale)
    1256          75 :                 convertBcp47ToLocale();
    1257       24499 :             if (mbInitializedLangID)
    1258          75 :                 convertBcp47ToLang();
    1259             :         }
    1260             :     }
    1261     1285497 :     return bChanged;
    1262             : }
    1263             : 
    1264             : 
    1265      423001 : void LanguageTag::syncFromImpl()
    1266             : {
    1267      423001 :     ImplPtr xImpl = getImpl();
    1268      423001 :     LanguageTagImpl* pImpl = xImpl.get();
    1269      423076 :     bool bRegister = ((mbInitializedBcp47 && maBcp47 != pImpl->maBcp47) ||
    1270      845927 :             (mbInitializedLangID && mnLangID != pImpl->mnLangID));
    1271             :     SAL_INFO_IF( bRegister, "i18nlangtag",
    1272             :             "LanguageTag::syncFromImpl: re-registering, '" << pImpl->maBcp47 << "' vs '" << maBcp47 <<
    1273             :             " and 0x" << ::std::hex << pImpl->mnLangID << " vs 0x" << ::std::hex << mnLangID);
    1274      423001 :     syncVarsFromRawImpl();
    1275      423001 :     if (bRegister)
    1276          75 :         mpImpl = registerImpl();
    1277      423001 : }
    1278             : 
    1279             : 
    1280     1244538 : void LanguageTag::syncVarsFromImpl() const
    1281             : {
    1282     1244538 :     if (!mpImpl)
    1283     1244538 :         getImpl();      // with side effect syncVarsFromRawImpl()
    1284             :     else
    1285           0 :         syncVarsFromRawImpl();
    1286     1244538 : }
    1287             : 
    1288             : 
    1289     2883158 : void LanguageTag::syncVarsFromRawImpl() const
    1290             : {
    1291             :     // Do not use getImpl() here.
    1292     2883158 :     LanguageTagImpl* pImpl = mpImpl.get();
    1293     2883158 :     if (!pImpl)
    1294     2883158 :         return;
    1295             : 
    1296             :     // Obviously only mutable variables.
    1297     2883158 :     mbInitializedBcp47  = pImpl->mbInitializedBcp47;
    1298     2883158 :     maBcp47             = pImpl->maBcp47;
    1299     2883158 :     mbInitializedLocale = pImpl->mbInitializedLocale;
    1300     2883158 :     maLocale            = pImpl->maLocale;
    1301     2883158 :     mbInitializedLangID = pImpl->mbInitializedLangID;
    1302     2883158 :     mnLangID            = pImpl->mnLangID;
    1303             : }
    1304             : 
    1305             : 
    1306           0 : bool LanguageTag::synCanonicalize()
    1307             : {
    1308           0 :     bool bChanged = getImpl()->synCanonicalize();
    1309           0 :     if (bChanged)
    1310           0 :         syncFromImpl();
    1311           0 :     return bChanged;
    1312             : }
    1313             : 
    1314             : 
    1315        2532 : void LanguageTagImpl::convertLocaleToBcp47()
    1316             : {
    1317        2532 :     if (mbSystemLocale && !mbInitializedLocale)
    1318           0 :         convertLangToLocale();
    1319             : 
    1320        2532 :     if (maLocale.Language.isEmpty())
    1321             :     {
    1322             :         // Do not call LanguageTag::convertToBcp47(Locale) that for an empty
    1323             :         // locale via LanguageTag::convertToBcp47(LanguageType) and
    1324             :         // LanguageTag::convertToLocale(LanguageType) would instanciate another
    1325             :         // LanguageTag.
    1326           0 :         maLocale = MsLangId::Conversion::convertLanguageToLocale( LANGUAGE_SYSTEM, true);
    1327             :     }
    1328        2532 :     if (maLocale.Language.isEmpty())
    1329             :     {
    1330           0 :         maBcp47 = OUString();   // bad luck
    1331             :     }
    1332        2532 :     else if (maLocale.Language == I18NLANGTAG_QLT)
    1333             :     {
    1334         384 :         maBcp47 = maLocale.Variant;
    1335         384 :         meIsIsoLocale = DECISION_NO;
    1336             :     }
    1337             :     else
    1338             :     {
    1339        2148 :         maBcp47 = LanguageTag::convertToBcp47( maLocale, true);
    1340             :     }
    1341        2532 :     mbInitializedBcp47 = true;
    1342        2532 : }
    1343             : 
    1344             : 
    1345      254411 : void LanguageTagImpl::convertLocaleToLang( bool bAllowOnTheFlyID )
    1346             : {
    1347      254411 :     if (mbSystemLocale)
    1348             :     {
    1349           0 :         mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
    1350             :     }
    1351             :     else
    1352             :     {
    1353      254411 :         mnLangID = MsLangId::Conversion::convertLocaleToLanguage( maLocale);
    1354      254411 :         if (mnLangID == LANGUAGE_DONTKNOW && bAllowOnTheFlyID)
    1355             :         {
    1356        3532 :             if (isValidBcp47())
    1357             :             {
    1358             :                 // For language-only (including script) look if we know some
    1359             :                 // locale of that language and if so try to use the primary
    1360             :                 // language ID of that instead of generating an on-the-fly ID.
    1361        3530 :                 if (getCountry().isEmpty() && isIsoODF())
    1362             :                 {
    1363        3439 :                     lang::Locale aLoc( MsLangId::Conversion::lookupFallbackLocale( maLocale));
    1364             :                     // 'en-US' is last resort, do not use except when looking
    1365             :                     // for 'en'.
    1366        3439 :                     if (aLoc.Language != "en" || getLanguage() == "en")
    1367             :                     {
    1368        3437 :                         mnLangID = MsLangId::Conversion::convertLocaleToLanguage( aLoc);
    1369        3437 :                         if (mnLangID != LANGUAGE_DONTKNOW)
    1370        3437 :                             mnLangID = MsLangId::getPrimaryLanguage( mnLangID);
    1371        3439 :                     }
    1372             :                 }
    1373        3530 :                 registerOnTheFly( mnLangID);
    1374             :             }
    1375             :             else
    1376             :             {
    1377             :                 SAL_WARN( "i18nlangtag", "LanguageTagImpl::convertLocaleToLang: with bAllowOnTheFlyID invalid '"
    1378             :                         << maBcp47 << "'");
    1379             :             }
    1380             :         }
    1381             :     }
    1382      254411 :     mbInitializedLangID = true;
    1383      254411 : }
    1384             : 
    1385             : 
    1386           0 : void LanguageTag::convertLocaleToLang()
    1387             : {
    1388           0 :     getImpl()->convertLocaleToLang( true);
    1389           0 :     syncFromImpl();
    1390           0 : }
    1391             : 
    1392             : 
    1393      222596 : void LanguageTagImpl::convertBcp47ToLocale()
    1394             : {
    1395      222596 :     bool bIso = isIsoLocale();
    1396      222596 :     if (bIso)
    1397             :     {
    1398      196713 :         maLocale.Language = getLanguageFromLangtag();
    1399      196713 :         maLocale.Country = getRegionFromLangtag();
    1400      196713 :         maLocale.Variant = OUString();
    1401             :     }
    1402             :     else
    1403             :     {
    1404       25883 :         maLocale.Language = I18NLANGTAG_QLT;
    1405       25883 :         maLocale.Country = getCountry();
    1406       25883 :         maLocale.Variant = maBcp47;
    1407             :     }
    1408      222596 :     mbInitializedLocale = true;
    1409      222596 : }
    1410             : 
    1411             : 
    1412           0 : void LanguageTag::convertBcp47ToLocale()
    1413             : {
    1414           0 :     getImpl()->convertBcp47ToLocale();
    1415           0 :     syncFromImpl();
    1416           0 : }
    1417             : 
    1418             : 
    1419          75 : void LanguageTagImpl::convertBcp47ToLang()
    1420             : {
    1421          75 :     if (mbSystemLocale)
    1422             :     {
    1423           0 :         mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
    1424             :     }
    1425             :     else
    1426             :     {
    1427          75 :         if (!mbInitializedLocale)
    1428           0 :             convertBcp47ToLocale();
    1429          75 :         convertLocaleToLang( true);
    1430             :     }
    1431          75 :     mbInitializedLangID = true;
    1432          75 : }
    1433             : 
    1434             : 
    1435           0 : void LanguageTag::convertBcp47ToLang()
    1436             : {
    1437           0 :     getImpl()->convertBcp47ToLang();
    1438           0 :     syncFromImpl();
    1439           0 : }
    1440             : 
    1441             : 
    1442        2532 : void LanguageTagImpl::convertLangToLocale()
    1443             : {
    1444        2532 :     if (mbSystemLocale && !mbInitializedLangID)
    1445             :     {
    1446           0 :         mnLangID = MsLangId::getRealLanguage( LANGUAGE_SYSTEM);
    1447           0 :         mbInitializedLangID = true;
    1448             :     }
    1449             :     // Resolve system here! The original is remembered as mbSystemLocale.
    1450        2532 :     maLocale = MsLangId::Conversion::convertLanguageToLocale( mnLangID, true);
    1451        2532 :     mbInitializedLocale = true;
    1452        2532 : }
    1453             : 
    1454             : 
    1455           0 : void LanguageTag::convertLangToLocale()
    1456             : {
    1457           0 :     getImpl()->convertLangToLocale();
    1458           0 :     syncFromImpl();
    1459           0 : }
    1460             : 
    1461             : 
    1462         131 : void LanguageTagImpl::convertLangToBcp47()
    1463             : {
    1464         131 :     if (!mbInitializedLocale)
    1465         131 :         convertLangToLocale();
    1466         131 :     convertLocaleToBcp47();
    1467         131 :     mbInitializedBcp47 = true;
    1468         131 : }
    1469             : 
    1470             : 
    1471         137 : void LanguageTag::convertFromRtlLocale()
    1472             : {
    1473             :     // The rtl_Locale follows the Open Group Base Specification,
    1474             :     // 8.2 Internationalization Variables
    1475             :     // language[_territory][.codeset][@modifier]
    1476             :     // On GNU/Linux systems usually being glibc locales.
    1477             :     // sal/osl/unx/nlsupport.c _parse_locale() parses them into
    1478             :     // Language: language               2 or 3 alpha code
    1479             :     // Country: [territory]             2 alpha code
    1480             :     // Variant: [.codeset][@modifier]
    1481             :     // Variant effectively contains anything that follows the territory, not
    1482             :     // looking for '.' dot delimiter or '@' modifier content.
    1483         137 :     if (!maLocale.Variant.isEmpty())
    1484             :     {
    1485         274 :         OString aStr = OUStringToOString( maLocale.Language + "_" + maLocale.Country + maLocale.Variant,
    1486         137 :                 RTL_TEXTENCODING_UTF8);
    1487             :         /* FIXME: let liblangtag parse this entirely with
    1488             :          * lt_tag_convert_from_locale() but that needs a patch to pass the
    1489             :          * string. */
    1490             : #if 0
    1491             :         myLtError aError;
    1492             :         theDataRef::get().incRef();
    1493             :         mpImplLangtag = lt_tag_convert_from_locale( aStr.getStr(), &aError.p);
    1494             :         maBcp47 = OStringToOUString( lt_tag_get_string( mpImplLangtag), RTL_TEXTENCODING_UTF8);
    1495             :         mbInitializedBcp47 = true;
    1496             : #else
    1497         137 :         mnLangID = MsLangId::convertUnxByteStringToLanguage( aStr);
    1498         137 :         if (mnLangID == LANGUAGE_DONTKNOW)
    1499             :         {
    1500             :             SAL_WARN( "i18nlangtag", "LanguageTag(rtl_Locale) - unknown: " << aStr);
    1501           0 :             mnLangID = LANGUAGE_ENGLISH_US;     // we need _something_ here
    1502             :         }
    1503         137 :         mbInitializedLangID = true;
    1504             : #endif
    1505         137 :         maLocale = lang::Locale();
    1506         137 :         mbInitializedLocale = false;
    1507             :     }
    1508         137 : }
    1509             : 
    1510             : 
    1511      249118 : const OUString & LanguageTagImpl::getBcp47() const
    1512             : {
    1513      249118 :     if (!mbInitializedBcp47)
    1514             :     {
    1515         131 :         if (mbInitializedLocale)
    1516           0 :             const_cast<LanguageTagImpl*>(this)->convertLocaleToBcp47();
    1517             :         else
    1518         131 :             const_cast<LanguageTagImpl*>(this)->convertLangToBcp47();
    1519             :     }
    1520      249118 :     return maBcp47;
    1521             : }
    1522             : 
    1523             : 
    1524      256591 : const OUString & LanguageTag::getBcp47( bool bResolveSystem ) const
    1525             : {
    1526      256591 :     if (!bResolveSystem && mbSystemLocale)
    1527        3826 :         return theEmptyBcp47::get();
    1528      252765 :     if (!mbInitializedBcp47)
    1529      200549 :         syncVarsFromImpl();
    1530      252765 :     if (!mbInitializedBcp47)
    1531             :     {
    1532         125 :         getImpl()->getBcp47();
    1533         125 :         const_cast<LanguageTag*>(this)->syncFromImpl();
    1534             :     }
    1535      252765 :     return maBcp47;
    1536             : }
    1537             : 
    1538             : 
    1539      395926 : OUString LanguageTagImpl::getLanguageFromLangtag()
    1540             : {
    1541      395926 :     OUString aLanguage;
    1542      395926 :     synCanonicalize();
    1543      395926 :     if (maBcp47.isEmpty())
    1544           0 :         return aLanguage;
    1545      395926 :     if (mpImplLangtag)
    1546             :     {
    1547          10 :         const lt_lang_t* pLangT = lt_tag_get_language( mpImplLangtag);
    1548             :         SAL_WARN_IF( !pLangT, "i18nlangtag",
    1549             :                 "LanguageTag::getLanguageFromLangtag: pLangT==NULL for '" << maBcp47 << "'");
    1550          10 :         if (!pLangT)
    1551           5 :             return aLanguage;
    1552           5 :         const char* pLang = lt_lang_get_tag( pLangT);
    1553             :         SAL_WARN_IF( !pLang, "i18nlangtag",
    1554             :                 "LanguageTag::getLanguageFromLangtag: pLang==NULL for '" << maBcp47 << "'");
    1555           5 :         if (pLang)
    1556           5 :             aLanguage = OUString::createFromAscii( pLang);
    1557             :     }
    1558             :     else
    1559             :     {
    1560      395916 :         if (mbCachedLanguage || cacheSimpleLSCV())
    1561      395914 :             aLanguage = maCachedLanguage;
    1562             :     }
    1563      395921 :     return aLanguage;
    1564             : }
    1565             : 
    1566             : 
    1567           9 : OUString LanguageTagImpl::getScriptFromLangtag()
    1568             : {
    1569           9 :     OUString aScript;
    1570           9 :     synCanonicalize();
    1571           9 :     if (maBcp47.isEmpty())
    1572           0 :         return aScript;
    1573           9 :     if (mpImplLangtag)
    1574             :     {
    1575           8 :         const lt_script_t* pScriptT = lt_tag_get_script( mpImplLangtag);
    1576             :         // pScriptT==NULL is valid for default scripts
    1577           8 :         if (!pScriptT)
    1578           8 :             return aScript;
    1579           0 :         const char* pScript = lt_script_get_tag( pScriptT);
    1580             :         SAL_WARN_IF( !pScript, "i18nlangtag", "LanguageTag::getScriptFromLangtag: pScript==NULL");
    1581           0 :         if (pScript)
    1582           0 :             aScript = OUString::createFromAscii( pScript);
    1583             :     }
    1584             :     else
    1585             :     {
    1586           1 :         if (mbCachedScript || cacheSimpleLSCV())
    1587           1 :             aScript = maCachedScript;
    1588             :     }
    1589           1 :     return aScript;
    1590             : }
    1591             : 
    1592             : 
    1593      421297 : OUString LanguageTagImpl::getRegionFromLangtag()
    1594             : {
    1595      421297 :     OUString aRegion;
    1596      421297 :     synCanonicalize();
    1597      421297 :     if (maBcp47.isEmpty())
    1598           0 :         return aRegion;
    1599      421297 :     if (mpImplLangtag)
    1600             :     {
    1601          14 :         const lt_region_t* pRegionT = lt_tag_get_region( mpImplLangtag);
    1602             :         // pRegionT==NULL is valid for language only tags, rough check here
    1603             :         // that does not take sophisticated tags into account that actually
    1604             :         // should have a region, check for ll, lll, ll-Ssss and lll-Ssss so
    1605             :         // that ll-CC and lll-CC actually fail.
    1606             :         SAL_WARN_IF( !pRegionT &&
    1607             :                 maBcp47.getLength() != 2 && maBcp47.getLength() != 3 &&
    1608             :                 maBcp47.getLength() != 7 && maBcp47.getLength() != 8,
    1609             :                 "i18nlangtag", "LanguageTag::getRegionFromLangtag: pRegionT==NULL for '" << maBcp47 << "'");
    1610          14 :         if (!pRegionT)
    1611          14 :             return aRegion;
    1612           0 :         const char* pRegion = lt_region_get_tag( pRegionT);
    1613             :         SAL_WARN_IF( !pRegion, "i18nlangtag",
    1614             :                 "LanguageTag::getRegionFromLangtag: pRegion==NULL for'" << maBcp47 << "'");
    1615           0 :         if (pRegion)
    1616           0 :             aRegion = OUString::createFromAscii( pRegion);
    1617             :     }
    1618             :     else
    1619             :     {
    1620      421283 :         if (mbCachedCountry || cacheSimpleLSCV())
    1621      421280 :             aRegion = maCachedCountry;
    1622             :     }
    1623      421283 :     return aRegion;
    1624             : }
    1625             : 
    1626             : 
    1627           1 : OUString LanguageTagImpl::getVariantsFromLangtag()
    1628             : {
    1629           1 :     OUString aVariants;
    1630           1 :     synCanonicalize();
    1631           1 :     if (maBcp47.isEmpty())
    1632           0 :         return aVariants;
    1633           1 :     if (mpImplLangtag)
    1634             :     {
    1635           1 :         const lt_list_t* pVariantsT = lt_tag_get_variants( mpImplLangtag);
    1636           2 :         for (const lt_list_t* pE = pVariantsT; pE; pE = lt_list_next( pE))
    1637             :         {
    1638           1 :             const lt_variant_t* pVariantT = static_cast<const lt_variant_t*>(lt_list_value( pE));
    1639           1 :             if (pVariantT)
    1640             :             {
    1641           1 :                 const char* p = lt_variant_get_tag( pVariantT);
    1642           1 :                 if (p)
    1643             :                 {
    1644           1 :                     if (aVariants.isEmpty())
    1645           1 :                         aVariants = OUString::createFromAscii( p);
    1646             :                     else
    1647           0 :                         aVariants += "-" + OUString::createFromAscii( p);
    1648             :                 }
    1649             :             }
    1650             :         }
    1651             :     }
    1652             :     else
    1653             :     {
    1654           0 :         if (mbCachedVariants || cacheSimpleLSCV())
    1655           0 :             aVariants = maCachedVariants;
    1656             :     }
    1657           1 :     return aVariants;
    1658             : }
    1659             : 
    1660             : 
    1661     2452348 : const com::sun::star::lang::Locale & LanguageTag::getLocale( bool bResolveSystem ) const
    1662             : {
    1663     2452348 :     if (!bResolveSystem && mbSystemLocale)
    1664         276 :         return theEmptyLocale::get();
    1665     2452072 :     if (!mbInitializedLocale)
    1666      547446 :         syncVarsFromImpl();
    1667     2452072 :     if (!mbInitializedLocale)
    1668             :     {
    1669           0 :         if (mbInitializedBcp47)
    1670           0 :             const_cast<LanguageTag*>(this)->convertBcp47ToLocale();
    1671             :         else
    1672           0 :             const_cast<LanguageTag*>(this)->convertLangToLocale();
    1673             :     }
    1674     2452072 :     return maLocale;
    1675             : }
    1676             : 
    1677             : 
    1678    10150540 : LanguageType LanguageTag::getLanguageType( bool bResolveSystem ) const
    1679             : {
    1680    10150540 :     if (!bResolveSystem && mbSystemLocale)
    1681     1890126 :         return LANGUAGE_SYSTEM;
    1682     8260414 :     if (!mbInitializedLangID)
    1683      496543 :         syncVarsFromImpl();
    1684     8260414 :     if (!mbInitializedLangID)
    1685             :     {
    1686           0 :         if (mbInitializedBcp47)
    1687           0 :             const_cast<LanguageTag*>(this)->convertBcp47ToLang();
    1688             :         else
    1689             :         {
    1690           0 :             const_cast<LanguageTag*>(this)->convertLocaleToLang();
    1691             : 
    1692             :             /* Resolve a locale only unknown due to some redundant information,
    1693             :              * like 'de-Latn-DE' with script tag. Never call canonicalize()
    1694             :              * from within convert...() methods due to possible recursion, so
    1695             :              * do it here. */
    1696           0 :             if ((!mbSystemLocale && mnLangID == LANGUAGE_SYSTEM) || mnLangID == LANGUAGE_DONTKNOW)
    1697           0 :                 const_cast<LanguageTag*>(this)->synCanonicalize();
    1698             :         }
    1699             :     }
    1700     8260414 :     return mnLangID;
    1701             : }
    1702             : 
    1703             : 
    1704           0 : void LanguageTag::getIsoLanguageScriptCountry( OUString& rLanguage, OUString& rScript, OUString& rCountry ) const
    1705             : {
    1706             :     // Calling isIsoODF() first is a predicate for getLanguage(), getScript()
    1707             :     // and getCountry() to work correctly in this context.
    1708           0 :     if (isIsoODF())
    1709             :     {
    1710           0 :         rLanguage = getLanguage();
    1711           0 :         rScript   = getScript();
    1712           0 :         rCountry  = getCountry();
    1713             :     }
    1714             :     else
    1715             :     {
    1716           0 :         rLanguage = (LanguageTag::isIsoLanguage( getLanguage()) ? getLanguage() : OUString());
    1717           0 :         rScript   = (LanguageTag::isIsoScript(   getScript())   ? getScript()   : OUString());
    1718           0 :         rCountry  = (LanguageTag::isIsoCountry(  getCountry())  ? getCountry()  : OUString());
    1719             :     }
    1720           0 : }
    1721             : 
    1722             : 
    1723             : namespace
    1724             : {
    1725             : 
    1726      459452 : inline bool isLowerAscii( sal_Unicode c )
    1727             : {
    1728      459452 :     return 'a' <= c && c <= 'z';
    1729             : }
    1730             : 
    1731      394854 : inline bool isUpperAscii( sal_Unicode c )
    1732             : {
    1733      394854 :     return 'A' <= c && c <= 'Z';
    1734             : }
    1735             : 
    1736             : }
    1737             : 
    1738             : 
    1739             : // static
    1740      198705 : bool LanguageTag::isIsoLanguage( const OUString& rLanguage )
    1741             : {
    1742             :     /* TODO: ignore case? For now let's see where rubbish is used. */
    1743             :     bool b2chars;
    1744      658167 :     if (((b2chars = (rLanguage.getLength() == 2)) || rLanguage.getLength() == 3) &&
    1745      794787 :             isLowerAscii( rLanguage[0]) && isLowerAscii( rLanguage[1]) &&
    1746       62052 :             (b2chars || isLowerAscii( rLanguage[2])))
    1747      198694 :         return true;
    1748             :     SAL_WARN_IF( ((rLanguage.getLength() == 2 || rLanguage.getLength() == 3) &&
    1749             :                 (isUpperAscii( rLanguage[0]) || isUpperAscii( rLanguage[1]))) ||
    1750             :             (rLanguage.getLength() == 3 && isUpperAscii( rLanguage[2])), "i18nlangtag",
    1751             :             "LanguageTag::isIsoLanguage: rejecting upper case " << rLanguage);
    1752          11 :     return false;
    1753             : }
    1754             : 
    1755             : 
    1756             : // static
    1757      224584 : bool LanguageTag::isIsoCountry( const OUString& rRegion )
    1758             : {
    1759             :     /* TODO: ignore case? For now let's see where rubbish is used. */
    1760      646593 :     if (rRegion.isEmpty() ||
    1761      394850 :             (rRegion.getLength() == 2 && isUpperAscii( rRegion[0]) && isUpperAscii( rRegion[1])))
    1762      224584 :         return true;
    1763             :     SAL_WARN_IF( rRegion.getLength() == 2 && (isLowerAscii( rRegion[0]) || isLowerAscii( rRegion[1])),
    1764             :             "i18nlangtag", "LanguageTag::isIsoCountry: rejecting lower case " << rRegion);
    1765           0 :     return false;
    1766             : }
    1767             : 
    1768             : 
    1769             : // static
    1770        3472 : bool LanguageTag::isIsoScript( const OUString& rScript )
    1771             : {
    1772             :     /* TODO: ignore case? For now let's see where rubbish is used. */
    1773        6948 :     if (rScript.isEmpty() ||
    1774           8 :             (rScript.getLength() == 4 &&
    1775          12 :              isUpperAscii( rScript[0]) && isLowerAscii( rScript[1]) &&
    1776           8 :              isLowerAscii( rScript[2]) && isLowerAscii( rScript[3])))
    1777        3472 :         return true;
    1778             :     SAL_WARN_IF( rScript.getLength() == 4 &&
    1779             :             (isLowerAscii( rScript[0]) || isUpperAscii( rScript[1]) ||
    1780             :              isUpperAscii( rScript[2]) || isUpperAscii( rScript[3])),
    1781             :             "i18nlangtag", "LanguageTag::isIsoScript: rejecting case mismatch " << rScript);
    1782           0 :     return false;
    1783             : }
    1784             : 
    1785             : 
    1786      201022 : OUString LanguageTagImpl::getLanguage() const
    1787             : {
    1788      201022 :     if (!mbCachedLanguage)
    1789             :     {
    1790      199213 :         maCachedLanguage = const_cast<LanguageTagImpl*>(this)->getLanguageFromLangtag();
    1791      199213 :         mbCachedLanguage = true;
    1792             :     }
    1793      201022 :     return maCachedLanguage;
    1794             : }
    1795             : 
    1796             : 
    1797     1383130 : OUString LanguageTag::getLanguage() const
    1798             : {
    1799     1383130 :     ImplPtr pImpl = getImpl();
    1800     1383130 :     if (pImpl->mbCachedLanguage)
    1801     1380815 :         return pImpl->maCachedLanguage;
    1802        4630 :     OUString aRet( pImpl->getLanguage());
    1803        2315 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1804     1385445 :     return aRet;
    1805             : }
    1806             : 
    1807             : 
    1808        3472 : OUString LanguageTagImpl::getScript() const
    1809             : {
    1810        3472 :     if (!mbCachedScript)
    1811             :     {
    1812           9 :         maCachedScript = const_cast<LanguageTagImpl*>(this)->getScriptFromLangtag();
    1813           9 :         mbCachedScript = true;
    1814             :     }
    1815        3472 :     return maCachedScript;
    1816             : }
    1817             : 
    1818             : 
    1819       49555 : OUString LanguageTag::getScript() const
    1820             : {
    1821       49555 :     ImplPtr pImpl = getImpl();
    1822       49555 :     if (pImpl->mbCachedScript)
    1823       49555 :         return pImpl->maCachedScript;
    1824           0 :     OUString aRet( pImpl->getScript());
    1825           0 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1826       49555 :     return aRet;
    1827             : }
    1828             : 
    1829             : 
    1830       11641 : OUString LanguageTag::getLanguageAndScript() const
    1831             : {
    1832       11641 :     OUString aLanguageScript( getLanguage());
    1833       23282 :     OUString aScript( getScript());
    1834       11641 :     if (!aScript.isEmpty())
    1835             :     {
    1836           3 :         aLanguageScript += "-" + aScript;
    1837             :     }
    1838       23282 :     return aLanguageScript;
    1839             : }
    1840             : 
    1841             : 
    1842       29418 : OUString LanguageTagImpl::getCountry() const
    1843             : {
    1844       29418 :     if (!mbCachedCountry)
    1845             :     {
    1846       25890 :         maCachedCountry = const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
    1847       25890 :         if (!LanguageTag::isIsoCountry( maCachedCountry))
    1848           0 :             maCachedCountry = OUString();
    1849       25890 :         mbCachedCountry = true;
    1850             :     }
    1851       29418 :     return maCachedCountry;
    1852             : }
    1853             : 
    1854             : 
    1855      419959 : OUString LanguageTag::getCountry() const
    1856             : {
    1857      419959 :     ImplPtr pImpl = getImpl();
    1858      419959 :     if (pImpl->mbCachedCountry)
    1859      419954 :         return pImpl->maCachedCountry;
    1860          10 :     OUString aRet( pImpl->getCountry());
    1861           5 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1862      419964 :     return aRet;
    1863             : }
    1864             : 
    1865             : 
    1866      198694 : OUString LanguageTagImpl::getRegion() const
    1867             : {
    1868      198694 :     return const_cast<LanguageTagImpl*>(this)->getRegionFromLangtag();
    1869             : }
    1870             : 
    1871             : 
    1872           5 : OUString LanguageTagImpl::getVariants() const
    1873             : {
    1874           5 :     if (!mbCachedVariants)
    1875             :     {
    1876           1 :         maCachedVariants = const_cast<LanguageTagImpl*>(this)->getVariantsFromLangtag();
    1877           1 :         mbCachedVariants = true;
    1878             :     }
    1879           5 :     return maCachedVariants;
    1880             : }
    1881             : 
    1882             : 
    1883       40192 : OUString LanguageTag::getVariants() const
    1884             : {
    1885       40192 :     ImplPtr pImpl = getImpl();
    1886       40192 :     if (pImpl->mbCachedVariants)
    1887       40192 :         return pImpl->maCachedVariants;
    1888           0 :     OUString aRet( pImpl->getVariants());
    1889           0 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1890       40192 :     return aRet;
    1891             : }
    1892             : 
    1893             : 
    1894           0 : OUString LanguageTag::getGlibcLocaleString( const OUString & rEncoding ) const
    1895             : {
    1896           0 :     OUString aRet;
    1897           0 :     if (isIsoLocale())
    1898             :     {
    1899           0 :         OUString aCountry( getCountry());
    1900           0 :         if (aCountry.isEmpty())
    1901           0 :             aRet = getLanguage() + rEncoding;
    1902             :         else
    1903           0 :             aRet = getLanguage() + "_" + aCountry + rEncoding;
    1904             :     }
    1905             :     else
    1906             :     {
    1907             :         /* FIXME: use the aImplIsoLangGLIBCModifiersEntries table from
    1908             :          * i18nlangtag/source/isolang/isolang.cxx or let liblangtag handle it.
    1909             :          * So far no code was prepared for anything else than a simple
    1910             :          * language_country locale so we don't loose anything here right now.
    1911             :          * */
    1912             :     }
    1913           0 :     return aRet;
    1914             : }
    1915             : 
    1916             : 
    1917       40486 : bool LanguageTagImpl::hasScript() const
    1918             : {
    1919       40486 :     if (!mbCachedScript)
    1920           0 :         getScript();
    1921       40486 :     return !maCachedScript.isEmpty();
    1922             : }
    1923             : 
    1924             : 
    1925       40486 : bool LanguageTag::hasScript() const
    1926             : {
    1927       40486 :     bool bRet = getImpl()->hasScript();
    1928       40486 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1929       40486 :     return bRet;
    1930             : }
    1931             : 
    1932             : 
    1933      225012 : bool LanguageTagImpl::cacheSimpleLSCV()
    1934             : {
    1935      450024 :     OUString aLanguage, aScript, aCountry, aVariants;
    1936      225012 :     Extraction eExt = simpleExtract( maBcp47, aLanguage, aScript, aCountry, aVariants);
    1937      225012 :     bool bRet = (eExt == EXTRACTED_LSC || eExt == EXTRACTED_LV);
    1938      225012 :     if (bRet)
    1939             :     {
    1940      225007 :         maCachedLanguage = aLanguage;
    1941      225007 :         maCachedScript   = aScript;
    1942      225007 :         maCachedCountry  = aCountry;
    1943      225007 :         maCachedVariants = aVariants;
    1944      225007 :         mbCachedLanguage = mbCachedScript = mbCachedCountry = mbCachedVariants = true;
    1945             :     }
    1946      450024 :     return bRet;
    1947             : }
    1948             : 
    1949             : 
    1950      599062 : bool LanguageTagImpl::isIsoLocale() const
    1951             : {
    1952      599062 :     if (meIsIsoLocale == DECISION_DONTKNOW)
    1953             :     {
    1954      224572 :         const_cast<LanguageTagImpl*>(this)->synCanonicalize();
    1955             :         // It must be at most ll-CC or lll-CC
    1956             :         // Do not use getCountry() here, use getRegion() instead.
    1957      449144 :         meIsIsoLocale = ((maBcp47.isEmpty() ||
    1958     1243912 :                     (maBcp47.getLength() <= 6 && LanguageTag::isIsoLanguage( getLanguage()) &&
    1959     1071088 :                      LanguageTag::isIsoCountry( getRegion()))) ? DECISION_YES : DECISION_NO);
    1960             :     }
    1961      599062 :     return meIsIsoLocale == DECISION_YES;
    1962             : }
    1963             : 
    1964             : 
    1965      372999 : bool LanguageTag::isIsoLocale() const
    1966             : {
    1967      372999 :     bool bRet = getImpl()->isIsoLocale();
    1968      372999 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1969      372999 :     return bRet;
    1970             : }
    1971             : 
    1972             : 
    1973        3687 : bool LanguageTagImpl::isIsoODF() const
    1974             : {
    1975        3687 :     if (meIsIsoODF == DECISION_DONTKNOW)
    1976             :     {
    1977        3467 :         const_cast<LanguageTagImpl*>(this)->synCanonicalize();
    1978        3467 :         if (!LanguageTag::isIsoScript( getScript()))
    1979           0 :             return ((meIsIsoODF = DECISION_NO) == DECISION_YES);
    1980             :         // The usual case is lll-CC so simply check that first.
    1981        3467 :         if (isIsoLocale())
    1982        3455 :             return ((meIsIsoODF = DECISION_YES) == DECISION_YES);
    1983             :         // If this is not ISO locale for which script must not exist it can
    1984             :         // still be ISO locale plus ISO script lll-Ssss-CC, but not ll-vvvv ...
    1985             :         // ll-vvvvvvvv
    1986          54 :         meIsIsoODF = ((maBcp47.getLength() <= 11 && LanguageTag::isIsoLanguage( getLanguage()) &&
    1987          37 :                     LanguageTag::isIsoCountry( getRegion()) && LanguageTag::isIsoScript( getScript()) &&
    1988          38 :                     getVariants().isEmpty()) ? DECISION_YES : DECISION_NO);
    1989             :     }
    1990         232 :     return meIsIsoODF == DECISION_YES;
    1991             : }
    1992             : 
    1993             : 
    1994         241 : bool LanguageTag::isIsoODF() const
    1995             : {
    1996         241 :     bool bRet = getImpl()->isIsoODF();
    1997         241 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    1998         241 :     return bRet;
    1999             : }
    2000             : 
    2001             : 
    2002        4050 : bool LanguageTagImpl::isValidBcp47() const
    2003             : {
    2004        4050 :     if (meIsValid == DECISION_DONTKNOW)
    2005             :     {
    2006           0 :         const_cast<LanguageTagImpl*>(this)->synCanonicalize();
    2007             :         SAL_WARN_IF( meIsValid == DECISION_DONTKNOW, "i18nlangtag",
    2008             :                 "LanguageTag::isValidBcp47: canonicalize() didn't set meIsValid");
    2009             :     }
    2010        4050 :     return meIsValid == DECISION_YES;
    2011             : }
    2012             : 
    2013             : 
    2014         518 : bool LanguageTag::isValidBcp47() const
    2015             : {
    2016         518 :     bool bRet = getImpl()->isValidBcp47();
    2017         518 :     const_cast<LanguageTag*>(this)->syncFromImpl();
    2018         518 :     return bRet;
    2019             : }
    2020             : 
    2021             : 
    2022     6193426 : bool LanguageTag::isSystemLocale() const
    2023             : {
    2024     6193426 :     return mbSystemLocale;
    2025             : }
    2026             : 
    2027             : 
    2028        1683 : LanguageTag & LanguageTag::makeFallback()
    2029             : {
    2030        1683 :     if (!mbIsFallback)
    2031             :     {
    2032        1683 :         const lang::Locale& rLocale1 = getLocale( true);
    2033        1683 :         lang::Locale aLocale2( MsLangId::Conversion::lookupFallbackLocale( rLocale1));
    2034        5049 :         if (    rLocale1.Language != aLocale2.Language ||
    2035        3257 :                 rLocale1.Country  != aLocale2.Country ||
    2036        1574 :                 rLocale1.Variant  != aLocale2.Variant)
    2037             :         {
    2038         109 :             if (rLocale1.Language != "en" && aLocale2.Language == "en" && aLocale2.Country == "US")
    2039             :             {
    2040             :                 // "en-US" is the last resort fallback, try if we get a better
    2041             :                 // one for the fallback hierarchy of a non-"en" locale.
    2042           0 :                 ::std::vector< OUString > aFallbacks( getFallbackStrings( false));
    2043           0 :                 for (::std::vector< OUString >::const_iterator it( aFallbacks.begin()); it != aFallbacks.end(); ++it)
    2044             :                 {
    2045           0 :                     lang::Locale aLocale3( LanguageTag( *it).getLocale());
    2046           0 :                     aLocale2 = MsLangId::Conversion::lookupFallbackLocale( aLocale3);
    2047           0 :                     if (aLocale2.Language != "en" || aLocale2.Country != "US")
    2048           0 :                         break;  // for, success
    2049           0 :                 }
    2050             :             }
    2051             :             SAL_INFO( "i18nlangtag", "LanguageTag::makeFallback - for (" <<
    2052             :                     rLocale1.Language << "," << rLocale1.Country << "," << rLocale1.Variant << ") to (" <<
    2053             :                     aLocale2.Language << "," << aLocale2.Country << "," << aLocale2.Variant << ")");
    2054         109 :             reset( aLocale2);
    2055             :         }
    2056        1683 :         mbIsFallback = true;
    2057             :     }
    2058        1683 :     return *this;
    2059             : }
    2060             : 
    2061             : 
    2062             : /* TODO: maybe this now could take advantage of the mnOverride field in
    2063             :  * isolang.cxx entries and search for kSAME instead of harcoded special
    2064             :  * fallbacks. Though iterating through those tables would be slower and even
    2065             :  * then there would be some special cases, but we wouldn't lack entries that
    2066             :  * were missed out. */
    2067      290340 : ::std::vector< OUString > LanguageTag::getFallbackStrings( bool bIncludeFullBcp47 ) const
    2068             : {
    2069      290340 :     ::std::vector< OUString > aVec;
    2070      580680 :     OUString aLanguage( getLanguage());
    2071      580680 :     OUString aCountry( getCountry());
    2072      290340 :     if (isIsoLocale())
    2073             :     {
    2074      250150 :         if (!aCountry.isEmpty())
    2075             :         {
    2076      210849 :             if (bIncludeFullBcp47)
    2077      200153 :                 aVec.push_back( aLanguage + "-" + aCountry);
    2078      210849 :             if (aLanguage == "zh")
    2079             :             {
    2080             :                 // For zh-HK or zh-MO also list zh-TW, for all other zh-XX also
    2081             :                 // list zh-CN.
    2082        2736 :                 if (aCountry == "HK" || aCountry == "MO")
    2083         912 :                     aVec.push_back( aLanguage + "-TW");
    2084        1824 :                 else if (aCountry != "CN")
    2085         912 :                     aVec.push_back( aLanguage + "-CN");
    2086        2736 :                 aVec.push_back( aLanguage);
    2087             :             }
    2088      208113 :             else if (aLanguage == "sh")
    2089             :             {
    2090             :                 // Manual list instead of calling
    2091             :                 // LanguageTag( "sr-Latn-" + aCountry).getFallbackStrings( true)
    2092             :                 // that would also include "sh-*" again.
    2093           0 :                 aVec.push_back( "sr-Latn-" + aCountry);
    2094           0 :                 aVec.push_back( "sr-Latn");
    2095           0 :                 aVec.push_back( "sh");  // legacy with script, before default script with country
    2096           0 :                 aVec.push_back( "sr-" + aCountry);
    2097           0 :                 aVec.push_back( "sr");
    2098             :             }
    2099      208113 :             else if (aLanguage == "ca" && aCountry == "XV")
    2100             :             {
    2101           0 :                 ::std::vector< OUString > aRep( LanguageTag( "ca-ES-valencia").getFallbackStrings( true));
    2102           0 :                 aVec.insert( aVec.end(), aRep.begin(), aRep.end());
    2103             :                 // Already includes 'ca' language fallback.
    2104             :             }
    2105      208113 :             else if (aLanguage == "ku")
    2106             :             {
    2107           0 :                 if (aCountry == "TR" || aCountry == "SY")
    2108             :                 {
    2109           0 :                     aVec.push_back( "kmr-Latn-" + aCountry);
    2110           0 :                     aVec.push_back( "kmr-" + aCountry);
    2111           0 :                     aVec.push_back( "kmr-Latn");
    2112           0 :                     aVec.push_back( "kmr");
    2113           0 :                     aVec.push_back( aLanguage);
    2114             :                 }
    2115           0 :                 else if (aCountry == "IQ" || aCountry == "IR")
    2116             :                 {
    2117           0 :                     aVec.push_back( "ckb-" + aCountry);
    2118           0 :                     aVec.push_back( "ckb");
    2119             :                 }
    2120             :             }
    2121      208113 :             else if (aLanguage == "kmr" && (aCountry == "TR" || aCountry == "SY"))
    2122             :             {
    2123           0 :                 aVec.push_back( "ku-Latn-" + aCountry);
    2124           0 :                 aVec.push_back( "ku-" + aCountry);
    2125           0 :                 aVec.push_back( aLanguage);
    2126           0 :                 aVec.push_back( "ku");
    2127             :             }
    2128      208113 :             else if (aLanguage == "ckb" && (aCountry == "IQ" || aCountry == "IR"))
    2129             :             {
    2130        2736 :                 aVec.push_back( "ku-Arab-" + aCountry);
    2131        2736 :                 aVec.push_back( "ku-" + aCountry);
    2132        2736 :                 aVec.push_back( aLanguage);
    2133             :                 // not 'ku' only, that was used for Latin script
    2134             :             }
    2135             :             else
    2136      205377 :                 aVec.push_back( aLanguage);
    2137             :         }
    2138             :         else
    2139             :         {
    2140       39301 :             if (bIncludeFullBcp47)
    2141       39301 :                 aVec.push_back( aLanguage);
    2142       39301 :             if (aLanguage == "sh")
    2143             :             {
    2144           0 :                 aVec.push_back( "sr-Latn");
    2145           0 :                 aVec.push_back( "sr");
    2146             :             }
    2147       39301 :             else if (aLanguage == "pli")
    2148             :             {
    2149             :                 // a special case for Pali dictionary, see fdo#41599
    2150           0 :                 aVec.push_back( "pi-Latn");
    2151           0 :                 aVec.push_back( "pi");
    2152             :             }
    2153             :         }
    2154      250150 :         return aVec;
    2155             :     }
    2156             : 
    2157       40190 :     getBcp47();     // have maBcp47 now
    2158       40190 :     if (bIncludeFullBcp47)
    2159       40190 :         aVec.push_back( maBcp47);
    2160       80380 :     OUString aScript;
    2161       80380 :     OUString aVariants( getVariants());
    2162       80380 :     OUString aTmp;
    2163       40190 :     if (hasScript())
    2164             :     {
    2165       37905 :         aScript = getScript();
    2166       37905 :         bool bHaveLanguageScriptVariant = false;
    2167       37905 :         if (!aCountry.isEmpty())
    2168             :         {
    2169       25573 :             if (!aVariants.isEmpty())
    2170             :             {
    2171           0 :                 aTmp = aLanguage + "-" + aScript + "-" + aCountry + "-" + aVariants;
    2172           0 :                 if (aTmp != maBcp47)
    2173           0 :                     aVec.push_back( aTmp);
    2174             :                 // Language with variant but without country before language
    2175             :                 // without variant but with country.
    2176           0 :                 aTmp = aLanguage + "-" + aScript + "-" + aVariants;
    2177           0 :                 if (aTmp != maBcp47)
    2178           0 :                     aVec.push_back( aTmp);
    2179           0 :                 bHaveLanguageScriptVariant = true;
    2180             :             }
    2181       25573 :             aTmp = aLanguage + "-" + aScript + "-" + aCountry;
    2182       25573 :             if (aTmp != maBcp47)
    2183           0 :                 aVec.push_back( aTmp);
    2184       25573 :             if (aLanguage == "sr" && aScript == "Latn")
    2185             :             {
    2186             :                 // sr-Latn-CS => sr-Latn-YU, sh-CS, sh-YU
    2187        6386 :                 if (aCountry == "CS")
    2188             :                 {
    2189        1374 :                     aVec.push_back( "sr-Latn-YU");
    2190        1374 :                     aVec.push_back( "sh-CS");
    2191        1374 :                     aVec.push_back( "sh-YU");
    2192             :                 }
    2193             :                 else
    2194        5012 :                     aVec.push_back( "sh-" + aCountry);
    2195             :             }
    2196       19187 :             else if (aLanguage == "pi" && aScript == "Latn")
    2197           0 :                 aVec.push_back( "pli");     // a special case for Pali dictionary, see fdo#41599
    2198       19187 :             else if (aLanguage == "krm" && aScript == "Latn" && (aCountry == "TR" || aCountry == "SY"))
    2199           0 :                 aVec.push_back( "ku-" + aCountry);
    2200             :         }
    2201       37905 :         if (!aVariants.isEmpty() && !bHaveLanguageScriptVariant)
    2202             :         {
    2203           0 :             aTmp = aLanguage + "-" + aScript + "-" + aVariants;
    2204           0 :             if (aTmp != maBcp47)
    2205           0 :                 aVec.push_back( aTmp);
    2206             :         }
    2207       37905 :         aTmp = aLanguage + "-" + aScript;
    2208       37905 :         if (aTmp != maBcp47)
    2209       25573 :             aVec.push_back( aTmp);
    2210             : 
    2211             :         // 'sh' actually denoted a script, so have it here instead of appended
    2212             :         // at the end as language-only.
    2213       37905 :         if (aLanguage == "sr" && aScript == "Latn")
    2214        7754 :             aVec.push_back( "sh");
    2215       30151 :         else if (aLanguage == "ku" && aScript == "Arab")
    2216           0 :             aVec.push_back( "ckb");
    2217             :         // 'ku' only denoted Latin script
    2218       30151 :         else if (aLanguage == "krm" && aScript == "Latn" && aCountry.isEmpty())
    2219           0 :             aVec.push_back( "ku");
    2220             :     }
    2221       40190 :     bool bHaveLanguageVariant = false;
    2222       40190 :     if (!aCountry.isEmpty())
    2223             :     {
    2224       27856 :         if (!aVariants.isEmpty())
    2225             :         {
    2226        2283 :             aTmp = aLanguage + "-" + aCountry + "-" + aVariants;
    2227        2283 :             if (aTmp != maBcp47)
    2228           0 :                 aVec.push_back( aTmp);
    2229        2283 :             if (maBcp47 == "ca-ES-valencia")
    2230        1826 :                 aVec.push_back( "ca-XV");
    2231             :             // Language with variant but without country before language
    2232             :             // without variant but with country.
    2233             :             // But only if variant is not from a grandfathered tag that
    2234             :             // wouldn't match the rules, i.e. "de-1901" is fine but "en-oed" is
    2235             :             // not.
    2236        4566 :             if (aVariants.getLength() >= 5 ||
    2237         457 :                     (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
    2238             :             {
    2239        1826 :                 aTmp = aLanguage + "-" + aVariants;
    2240        1826 :                 if (aTmp != maBcp47)
    2241        1826 :                     aVec.push_back( aTmp);
    2242        1826 :                 bHaveLanguageVariant = true;
    2243             :             }
    2244             :         }
    2245       27856 :         aTmp = aLanguage + "-" + aCountry;
    2246       27856 :         if (aTmp != maBcp47)
    2247       27856 :             aVec.push_back( aTmp);
    2248             :     }
    2249       40190 :     if (!aVariants.isEmpty() && !bHaveLanguageVariant)
    2250             :     {
    2251             :         // Only if variant is not from a grandfathered tag that wouldn't match
    2252             :         // the rules, i.e. "de-1901" is fine but "en-oed" is not.
    2253         919 :         if (aVariants.getLength() >= 5 ||
    2254         459 :                 (aVariants.getLength() == 4 && '0' <= aVariants[0] && aVariants[0] <= '9'))
    2255             :         {
    2256           2 :             aTmp = aLanguage + "-" + aVariants;
    2257           2 :             if (aTmp != maBcp47)
    2258           0 :                 aVec.push_back( aTmp);
    2259             :         }
    2260             :     }
    2261             : 
    2262             :     // Insert legacy fallbacks with country before language-only, but only
    2263             :     // default script, script was handled already above.
    2264       40190 :     if (!aCountry.isEmpty())
    2265             :     {
    2266       27856 :         if (aLanguage == "sr" && aCountry == "CS")
    2267        1374 :             aVec.push_back( "sr-YU");
    2268             :     }
    2269             : 
    2270             :     // Original language-only.
    2271       40190 :     if (aLanguage != maBcp47)
    2272       40190 :         aVec.push_back( aLanguage);
    2273             : 
    2274       40190 :     return aVec;
    2275             : }
    2276             : 
    2277             : 
    2278           0 : bool LanguageTag::equals( const LanguageTag & rLanguageTag, bool bResolveSystem ) const
    2279             : {
    2280             :     // If SYSTEM is not to be resolved or either both are SYSTEM or none, we
    2281             :     // can use the operator==() optimization.
    2282           0 :     if (!bResolveSystem || isSystemLocale() == rLanguageTag.isSystemLocale())
    2283           0 :         return operator==( rLanguageTag);
    2284             : 
    2285             :     // Compare full language tag strings.
    2286           0 :     return getBcp47( bResolveSystem) == rLanguageTag.getBcp47( bResolveSystem);
    2287             : }
    2288             : 
    2289             : 
    2290     1279441 : bool LanguageTag::operator==( const LanguageTag & rLanguageTag ) const
    2291             : {
    2292     1279441 :     if (isSystemLocale() && rLanguageTag.isSystemLocale())
    2293      202940 :         return true;    // both SYSTEM
    2294             : 
    2295             :     // No need to convert to BCP47 if both Lang-IDs are available.
    2296     1076501 :     if (mbInitializedLangID && rLanguageTag.mbInitializedLangID)
    2297             :     {
    2298             :         // Equal if same ID and no SYSTEM is involved or both are SYSTEM.
    2299     1042614 :         return mnLangID == rLanguageTag.mnLangID && isSystemLocale() == rLanguageTag.isSystemLocale();
    2300             :     }
    2301             : 
    2302             :     // Compare full language tag strings but SYSTEM unresolved.
    2303       33887 :     return getBcp47( false) == rLanguageTag.getBcp47( false);
    2304             : }
    2305             : 
    2306             : 
    2307     1279441 : bool LanguageTag::operator!=( const LanguageTag & rLanguageTag ) const
    2308             : {
    2309     1279441 :     return !operator==( rLanguageTag);
    2310             : }
    2311             : 
    2312             : 
    2313          99 : bool LanguageTag::operator<( const LanguageTag & rLanguageTag ) const
    2314             : {
    2315          99 :     return getBcp47( false).compareToIgnoreAsciiCase( rLanguageTag.getBcp47( false)) < 0;
    2316             : }
    2317             : 
    2318             : 
    2319             : // static
    2320      465163 : LanguageTagImpl::Extraction LanguageTagImpl::simpleExtract( const OUString& rBcp47,
    2321             :         OUString& rLanguage, OUString& rScript, OUString& rCountry, OUString& rVariants )
    2322             : {
    2323      465163 :     Extraction eRet = EXTRACTED_NONE;
    2324      465163 :     const sal_Int32 nLen = rBcp47.getLength();
    2325      465163 :     const sal_Int32 nHyph1 = rBcp47.indexOf( '-');
    2326      465163 :     sal_Int32 nHyph2 = (nHyph1 < 0 ? -1 : rBcp47.indexOf( '-', nHyph1 + 1));
    2327      465163 :     sal_Int32 nHyph3 = (nHyph2 < 0 ? -1 : rBcp47.indexOf( '-', nHyph2 + 1));
    2328      465163 :     sal_Int32 nHyph4 = (nHyph3 < 0 ? -1 : rBcp47.indexOf( '-', nHyph3 + 1));
    2329      465163 :     if (nLen == 1 && rBcp47[0] == '*')              // * the dreaded jolly joker
    2330             :     {
    2331             :         // It's f*d up but we need to recognize this.
    2332           6 :         eRet = EXTRACTED_X_JOKER;
    2333             :     }
    2334      465157 :     else if (nHyph1 == 1 && rBcp47[0] == 'x')       // x-... privateuse
    2335             :     {
    2336             :         // x-... privateuse tags MUST be known to us by definition.
    2337           7 :         eRet = EXTRACTED_X;
    2338             :     }
    2339      465150 :     else if (nLen == 2 || nLen == 3)                // ll or lll
    2340             :     {
    2341       69542 :         if (nHyph1 < 0)
    2342             :         {
    2343       34771 :             rLanguage = rBcp47.toAsciiLowerCase();
    2344       34771 :             rScript = rCountry = rVariants = OUString();
    2345       34771 :             eRet = EXTRACTED_LSC;
    2346             :         }
    2347             :     }
    2348      430379 :     else if (  (nHyph1 == 2 && nLen == 5)           // ll-CC
    2349      173733 :             || (nHyph1 == 3 && nLen == 6))          // lll-CC
    2350             :     {
    2351      744972 :         if (nHyph2 < 0)
    2352             :         {
    2353      372486 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2354      372486 :             rCountry  = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
    2355      372486 :             rScript = rVariants = OUString();
    2356      372486 :             eRet = EXTRACTED_LSC;
    2357             :         }
    2358             :     }
    2359       57893 :     else if (  (nHyph1 == 2 && nLen ==  7)          // ll-Ssss or ll-vvvv
    2360       39759 :             || (nHyph1 == 3 && nLen ==  8))         // lll-Ssss or lll-vvvv
    2361             :     {
    2362       21762 :         if (nHyph2 < 0)
    2363             :         {
    2364       21762 :             sal_Unicode c = rBcp47[nHyph1+1];
    2365       21762 :             if ('0' <= c && c <= '9')
    2366             :             {
    2367             :                 // (DIGIT 3ALNUM) vvvv variant instead of Ssss script
    2368           1 :                 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2369           1 :                 rScript   = rCountry = OUString();
    2370           1 :                 rVariants = rBcp47.copy( nHyph1 + 1);
    2371           1 :                 eRet = EXTRACTED_LV;
    2372             :             }
    2373             :             else
    2374             :             {
    2375       21761 :                 rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2376       43522 :                 rScript   = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() +
    2377       65283 :                             rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
    2378       21761 :                 rCountry  = rVariants = OUString();
    2379       21761 :                 eRet = EXTRACTED_LSC;
    2380             :             }
    2381       21762 :         }
    2382             :     }
    2383       36131 :     else if (  (nHyph1 == 2 && nHyph2 == 7 && nLen == 10)   // ll-Ssss-CC
    2384       12309 :             || (nHyph1 == 3 && nHyph2 == 8 && nLen == 11))  // lll-Ssss-CC
    2385             :     {
    2386       68612 :         if (nHyph3 < 0)
    2387             :         {
    2388       34306 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2389       34306 :             rScript   = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
    2390       34306 :             rCountry  = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
    2391       34306 :             rVariants = OUString();
    2392       34306 :             eRet = EXTRACTED_LSC;
    2393             :         }
    2394             :     }
    2395        1825 :     else if (  (nHyph1 == 2 && nHyph2 == 7 && nHyph3 == 10 && nLen >= 15)   // ll-Ssss-CC-vvvv[vvvv][-...]
    2396        1825 :             || (nHyph1 == 3 && nHyph2 == 8 && nHyph3 == 11 && nLen >= 16))  // lll-Ssss-CC-vvvv[vvvv][-...]
    2397             :     {
    2398           0 :         if (nHyph4 < 0)
    2399           0 :             nHyph4 = rBcp47.getLength();
    2400           0 :         if (nHyph4 - nHyph3 > 4 && nHyph4 - nHyph3 <= 9)
    2401             :         {
    2402           0 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2403           0 :             rScript   = rBcp47.copy( nHyph1 + 1, 1).toAsciiUpperCase() + rBcp47.copy( nHyph1 + 2, 3).toAsciiLowerCase();
    2404           0 :             rCountry  = rBcp47.copy( nHyph2 + 1, 2).toAsciiUpperCase();
    2405           0 :             rVariants = rBcp47.copy( nHyph3 + 1);
    2406           0 :             eRet = EXTRACTED_LV;
    2407             :         }
    2408             :     }
    2409        1825 :     else if (  (nHyph1 == 2 && nHyph2 == 5 && nLen >= 10)   // ll-CC-vvvv[vvvv][-...]
    2410         913 :             || (nHyph1 == 3 && nHyph2 == 6 && nLen >= 11))  // lll-CC-vvvv[vvvv][-...]
    2411             :     {
    2412         912 :         if (nHyph3 < 0)
    2413         912 :             nHyph3 = rBcp47.getLength();
    2414        1824 :         if (nHyph3 - nHyph2 > 4 && nHyph3 - nHyph2 <= 9)
    2415             :         {
    2416         912 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2417         912 :             rScript   = OUString();
    2418         912 :             rCountry  = rBcp47.copy( nHyph1 + 1, 2).toAsciiUpperCase();
    2419         912 :             rVariants = rBcp47.copy( nHyph2 + 1);
    2420         912 :             eRet = EXTRACTED_LV;
    2421             :         }
    2422             :     }
    2423         913 :     else if (  (nHyph1 == 2 && nLen >= 8)                   // ll-vvvvv[vvv][-...]
    2424           3 :             || (nHyph1 == 3 && nLen >= 9))                  // lll-vvvvv[vvv][-...]
    2425             :     {
    2426         910 :         if (nHyph2 < 0)
    2427           3 :             nHyph2 = rBcp47.getLength();
    2428         910 :         if (nHyph2 - nHyph1 > 5 && nHyph2 - nHyph1 <= 9)
    2429             :         {
    2430           2 :             rLanguage = rBcp47.copy( 0, nHyph1).toAsciiLowerCase();
    2431           2 :             rScript   = rCountry = OUString();
    2432           2 :             rVariants = rBcp47.copy( nHyph1 + 1);
    2433           2 :             eRet = EXTRACTED_LV;
    2434             :         }
    2435             :         else
    2436             :         {
    2437             :             // Known and handled grandfathered; ugly but effective ...
    2438             :             // Note that nLen must have matched above.
    2439             :             // Strictly not a variant, but so far we treat it as such.
    2440         908 :             if (rBcp47.equalsIgnoreAsciiCase( "en-GB-oed"))
    2441             :             {
    2442         907 :                 rLanguage = "en";
    2443         907 :                 rScript   = OUString();
    2444         907 :                 rCountry  = "GB";
    2445         907 :                 rVariants = "oed";
    2446         907 :                 eRet = EXTRACTED_LV;
    2447             :             }
    2448             :         }
    2449             :     }
    2450      465163 :     if (eRet == EXTRACTED_NONE)
    2451             :     {
    2452             :         SAL_INFO( "i18nlangtag", "LanguageTagImpl::simpleExtract: did not extract '" << rBcp47 << "'");
    2453           4 :         rLanguage = rScript = rCountry = rVariants = OUString();
    2454             :     }
    2455      465163 :     return eRet;
    2456             : }
    2457             : 
    2458             : 
    2459             : // static
    2460       28290 : ::std::vector< OUString >::const_iterator LanguageTag::getFallback(
    2461             :         const ::std::vector< OUString > & rList, const OUString & rReference )
    2462             : {
    2463       28290 :     if (rList.empty())
    2464        2640 :         return rList.end();
    2465             : 
    2466       25650 :     ::std::vector< OUString >::const_iterator it;
    2467             : 
    2468             :     // Try the simple case first without constructing fallbacks.
    2469       36338 :     for (it = rList.begin(); it != rList.end(); ++it)
    2470             :     {
    2471       25650 :         if (*it == rReference)
    2472       14962 :             return it;  // exact match
    2473             :     }
    2474             : 
    2475       10688 :     ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
    2476       10688 :     if (rReference != "en-US")
    2477             :     {
    2478           0 :         aFallbacks.push_back( "en-US");
    2479           0 :         if (rReference != "en")
    2480           0 :             aFallbacks.push_back( "en");
    2481             :     }
    2482       10688 :     if (rReference != "x-default")
    2483       10688 :         aFallbacks.push_back( "x-default");
    2484       10688 :     if (rReference != "x-no-translate")
    2485       10688 :         aFallbacks.push_back( "x-no-translate");
    2486             :     /* TODO: the original comphelper::Locale::getFallback() code had
    2487             :      * "x-notranslate" instead of "x-no-translate", but all .xcu files use
    2488             :      * "x-no-translate" and "x-notranslate" apparently was never used anywhere.
    2489             :      * Did that ever work? Was it supposed to work at all like this? */
    2490             : 
    2491       41730 :     for (::std::vector< OUString >::const_iterator fb = aFallbacks.begin(); fb != aFallbacks.end(); ++fb)
    2492             :     {
    2493       62595 :         for (it = rList.begin(); it != rList.end(); ++it)
    2494             :         {
    2495       31553 :             if (*it == *fb)
    2496         511 :                 return it;  // fallback found
    2497             :         }
    2498             :     }
    2499             : 
    2500             :     // Did not find anything so return something of the list, the first value
    2501             :     // will do as well as any other as none did match any of the possible
    2502             :     // fallbacks.
    2503       10177 :     return rList.begin();
    2504             : }
    2505             : 
    2506             : 
    2507             : // static
    2508           0 : ::std::vector< com::sun::star::lang::Locale >::const_iterator LanguageTag::getMatchingFallback(
    2509             :         const ::std::vector< com::sun::star::lang::Locale > & rList,
    2510             :         const com::sun::star::lang::Locale & rReference )
    2511             : {
    2512           0 :     if (rList.empty())
    2513           0 :         return rList.end();
    2514             : 
    2515           0 :     ::std::vector< lang::Locale >::const_iterator it;
    2516             : 
    2517             :     // Try the simple case first without constructing fallbacks.
    2518           0 :     for (it = rList.begin(); it != rList.end(); ++it)
    2519             :     {
    2520           0 :         if (    (*it).Language == rReference.Language &&
    2521           0 :                 (*it).Country  == rReference.Country  &&
    2522           0 :                 (*it).Variant  == rReference.Variant)
    2523           0 :             return it;  // exact match
    2524             :     }
    2525             : 
    2526             :     // Now for each reference fallback test the fallbacks of the list in order.
    2527           0 :     ::std::vector< OUString > aFallbacks( LanguageTag( rReference).getFallbackStrings( false));
    2528           0 :     ::std::vector< ::std::vector< OUString > > aListFallbacks( rList.size());
    2529           0 :     size_t i = 0;
    2530           0 :     for (it = rList.begin(); it != rList.end(); ++it, ++i)
    2531             :     {
    2532           0 :         ::std::vector< OUString > aTmp( LanguageTag( *it).getFallbackStrings( true));
    2533           0 :         aListFallbacks[i] = aTmp;
    2534           0 :     }
    2535           0 :     for (::std::vector< OUString >::const_iterator rfb( aFallbacks.begin()); rfb != aFallbacks.end(); ++rfb)
    2536             :     {
    2537           0 :         for (::std::vector< ::std::vector< OUString > >::const_iterator lfb( aListFallbacks.begin());
    2538           0 :                 lfb != aListFallbacks.end(); ++lfb)
    2539             :         {
    2540           0 :             for (::std::vector< OUString >::const_iterator fb( (*lfb).begin()); fb != (*lfb).end(); ++fb)
    2541             :             {
    2542           0 :                 if (*rfb == *fb)
    2543           0 :                     return rList.begin() + (lfb - aListFallbacks.begin());
    2544             :             }
    2545             :         }
    2546             :     }
    2547             : 
    2548             :     // No match found.
    2549           0 :     return rList.end();
    2550             : }
    2551             : 
    2552             : 
    2553         228 : static bool lcl_isSystem( LanguageType nLangID )
    2554             : {
    2555         228 :     if (nLangID == LANGUAGE_SYSTEM)
    2556         195 :         return true;
    2557             :     // There are some special values that simplify to SYSTEM,
    2558             :     // getRealLanguage() catches and resolves them.
    2559          33 :     LanguageType nNewLangID = MsLangId::getRealLanguage( nLangID);
    2560          33 :     if (nNewLangID != nLangID)
    2561           0 :         return true;
    2562          33 :     return false;
    2563             : }
    2564             : 
    2565             : 
    2566             : // static
    2567      298075 : com::sun::star::lang::Locale LanguageTag::convertToLocale( LanguageType nLangID, bool bResolveSystem )
    2568             : {
    2569      298075 :     if (!bResolveSystem && lcl_isSystem( nLangID))
    2570         195 :         return lang::Locale();
    2571             : 
    2572      297880 :     return LanguageTag( nLangID).getLocale( bResolveSystem);
    2573             : }
    2574             : 
    2575             : 
    2576             : // static
    2577      522973 : LanguageType LanguageTag::convertToLanguageType( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem )
    2578             : {
    2579      522973 :     if (rLocale.Language.isEmpty() && !bResolveSystem)
    2580       41166 :         return LANGUAGE_SYSTEM;
    2581             : 
    2582      481807 :     return LanguageTag( rLocale).getLanguageType( bResolveSystem);
    2583             : }
    2584             : 
    2585             : 
    2586             : // static
    2587      775730 : OUString LanguageTagImpl::convertToBcp47( const com::sun::star::lang::Locale& rLocale )
    2588             : {
    2589      775730 :     OUString aBcp47;
    2590      775730 :     if (rLocale.Language.isEmpty())
    2591             :     {
    2592             :         // aBcp47 stays empty
    2593             :     }
    2594      775730 :     else if (rLocale.Language == I18NLANGTAG_QLT)
    2595             :     {
    2596       47563 :         aBcp47 = rLocale.Variant;
    2597             :     }
    2598             :     else
    2599             :     {
    2600             :         /* XXX NOTE: most legacy code never evaluated the Variant field, so for
    2601             :          * now just concatenate language and country. In case we stumbled over
    2602             :          * variant aware code we'd have to take care of that. */
    2603      728167 :         if (rLocale.Country.isEmpty())
    2604      116745 :             aBcp47 = rLocale.Language;
    2605             :         else
    2606             :         {
    2607      611422 :             aBcp47 = rLocale.Language + "-" + rLocale.Country;
    2608             :         }
    2609             :     }
    2610      775730 :     return aBcp47;
    2611             : }
    2612             : 
    2613             : 
    2614             : // static
    2615        4092 : OUString LanguageTag::convertToBcp47( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem )
    2616             : {
    2617        4092 :     OUString aBcp47;
    2618        4092 :     if (rLocale.Language.isEmpty())
    2619             :     {
    2620           5 :         if (bResolveSystem)
    2621           1 :             aBcp47 = LanguageTag::convertToBcp47( LANGUAGE_SYSTEM, true);
    2622             :         // else aBcp47 stays empty
    2623             :     }
    2624             :     else
    2625             :     {
    2626        4087 :         aBcp47 = LanguageTagImpl::convertToBcp47( rLocale);
    2627             :     }
    2628        4092 :     return aBcp47;
    2629             : }
    2630             : 
    2631             : 
    2632             : // static
    2633         273 : OUString LanguageTag::convertToBcp47( LanguageType nLangID, bool bResolveSystem )
    2634             : {
    2635             :     // Catch this first so we don't need the rest.
    2636         273 :     if (!bResolveSystem && lcl_isSystem( nLangID))
    2637           0 :         return OUString();
    2638             : 
    2639         273 :     lang::Locale aLocale( LanguageTag::convertToLocale( nLangID, bResolveSystem));
    2640             :     // If system for some reason (should not happen.. haha) could not be
    2641             :     // resolved DO NOT CALL LanguageTag::convertToBcp47(Locale) because that
    2642             :     // would recurse into this method here!
    2643         273 :     if (aLocale.Language.isEmpty() && bResolveSystem)
    2644           0 :         return OUString();      // bad luck, bail out
    2645         273 :     return LanguageTagImpl::convertToBcp47( aLocale);
    2646             : }
    2647             : 
    2648             : 
    2649             : // static
    2650       34138 : com::sun::star::lang::Locale LanguageTag::convertToLocale( const OUString& rBcp47, bool bResolveSystem )
    2651             : {
    2652       34138 :     if (rBcp47.isEmpty() && !bResolveSystem)
    2653           0 :         return lang::Locale();
    2654             : 
    2655       34138 :     return LanguageTag( rBcp47).getLocale( bResolveSystem);
    2656             : }
    2657             : 
    2658             : 
    2659             : // static
    2660         885 : LanguageType LanguageTag::convertToLanguageType( const OUString& rBcp47, bool bResolveSystem )
    2661             : {
    2662         885 :     if (rBcp47.isEmpty() && !bResolveSystem)
    2663           0 :         return LANGUAGE_SYSTEM;
    2664             : 
    2665         885 :     return LanguageTag( rBcp47).getLanguageType( bResolveSystem);
    2666             : }
    2667             : 
    2668             : 
    2669             : // static
    2670        1241 : LanguageType LanguageTag::convertToLanguageTypeWithFallback( const OUString& rBcp47 )
    2671             : {
    2672        1241 :     return LanguageTag( rBcp47).makeFallback().getLanguageType( true);
    2673             : }
    2674             : 
    2675             : 
    2676             : // static
    2677           0 : com::sun::star::lang::Locale LanguageTag::convertToLocaleWithFallback( const OUString& rBcp47 )
    2678             : {
    2679           0 :     return LanguageTag( rBcp47).makeFallback().getLocale( true);
    2680             : }
    2681             : 
    2682             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10