LCOV - code coverage report
Current view: top level - include/i18nlangtag - languagetag.hxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 1 1 100.0 %
Date: 2015-06-13 12:38:46 Functions: 1 1 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  */
       9             : 
      10             : #ifndef INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
      11             : #define INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
      12             : 
      13             : #include <sal/config.h>
      14             : #include <rtl/ustring.hxx>
      15             : #include <com/sun/star/lang/Locale.hpp>
      16             : #include <i18nlangtag/i18nlangtagdllapi.h>
      17             : #include <i18nlangtag/lang.h>
      18             : 
      19             : #include <memory>
      20             : #include <vector>
      21             : 
      22             : typedef struct _rtl_Locale rtl_Locale;  // as in rtl/locale.h
      23             : 
      24             : 
      25             : /** The ISO 639-2 code reserved for local use used to indicate that a
      26             :     com::sun::star::Locale contains a BCP 47 string in its Variant field. The
      27             :     Locale's Language field then will contain this language code.
      28             : 
      29             :     @see LanguageTag::getLocale()
      30             : 
      31             :     Avoid use, only needed internally or if conversion from Locale to
      32             :     LanguageTag is not wanted, i.e. during ODF import. To check whether a
      33             :     LanguageTag contains a plain language/country combination or a more
      34             :     detailed BCP 47 language tag use LanguageTag::isIsoLocale() instead.
      35             :  */
      36             : #define I18NLANGTAG_QLT "qlt"
      37             : 
      38             : 
      39             : class LanguageTagImpl;
      40             : 
      41             : 
      42             : /** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and
      43             :     conversions in between.
      44             : 
      45             :     Note that member variables are mutable and may change their values even in
      46             :     const methods. Getter methods return either the original value or matching
      47             :     converted values.
      48             : 
      49             :     For standalone conversions if no LanguageTag instance is at hand, static
      50             :     convertTo...() methods exist.
      51             :  */
      52             : class I18NLANGTAG_DLLPUBLIC LanguageTag
      53             : {
      54             :     friend class LanguageTagImpl;
      55             : 
      56             : public:
      57             : 
      58             :     /** Init LanguageTag with existing BCP 47 language tag string.
      59             : 
      60             :         @param bCanonicalize
      61             :             If TRUE, canonicalize tag and reparse, the resulting tag string may
      62             :             be different.
      63             :             IF FALSE, the tag is simply stored and can be retrieved with
      64             :             getBcp47().
      65             : 
      66             :         Note that conversions to ISO codes, locales or LanguageType or
      67             :         obtaining language or script will canonicalize the tag string anyway,
      68             :         so specifying bCanonicalize=false is not a guarantee that the tag will
      69             :         stay identical to what was passed.
      70             :      */
      71             :     explicit LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize = false );
      72             : 
      73             :     /** Init LanguageTag with Locale. */
      74             :     explicit LanguageTag( const com::sun::star::lang::Locale & rLocale );
      75             : 
      76             :     /** Init LanguageTag with LanguageType MS-LangID. */
      77             :     explicit LanguageTag( LanguageType nLanguage );
      78             : 
      79             :     /** Init LanguageTag with either BCP 47 language tag (precedence if not
      80             :         empty), or a combination of language, script and country.
      81             : 
      82             :         This is a convenience ctor to be used in ODF import where these are
      83             :         distinct attributes.
      84             :      */
      85             :     explicit LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
      86             :                           const OUString& rScript, const OUString& rCountry );
      87             : 
      88             :     /** Init LanguageTag with rtl_Locale.
      89             : 
      90             :         This is a convenience ctor.
      91             :      */
      92             :     explicit LanguageTag( const rtl_Locale & rLocale );
      93             : 
      94             :     LanguageTag( const LanguageTag & rLanguageTag );
      95             :     ~LanguageTag();
      96             :     LanguageTag& operator=( const LanguageTag & rLanguageTag );
      97             : 
      98             :     /** Obtain BCP 47 language tag.
      99             : 
     100             :         @param bResolveSystem
     101             :                If TRUE, resolve an empty language tag denoting the system
     102             :                locale to the real locale used.
     103             :                If FALSE, return an empty OUString for such a tag.
     104             :      */
     105             :     const OUString &                getBcp47( bool bResolveSystem = true ) const;
     106             : 
     107             :     /** Obtain language tag as Locale.
     108             : 
     109             :         As a convention, language tags that can not be expressed as "pure"
     110             :         com::sun::star::lang::Locale content using Language and Country fields
     111             :         store "qlt" (ISO 639 reserved for local use) in the Language field and
     112             :         the entire BCP 47 language tag in the Variant field. The Country field
     113             :         contains the corresponding ISO 3166 country code _if_ there is one, or
     114             :         otherwise is empty.
     115             : 
     116             :         @param bResolveSystem
     117             :                If TRUE, resolve an empty language tag denoting the system
     118             :                locale to the real locale used.
     119             :                If FALSE, return an empty Locale for such a tag.
     120             :      */
     121             :     const com::sun::star::lang::Locale &    getLocale( bool bResolveSystem = true ) const;
     122             : 
     123             :     /** Obtain mapping to MS-LangID.
     124             : 
     125             :         @param bResolveSystem
     126             :                If TRUE, resolve an empty language tag denoting the system
     127             :                locale to the real locale used.
     128             :                If FALSE, return LANGUAGE_SYSTEM for such a tag.
     129             :      */
     130             :     LanguageType                    getLanguageType( bool bResolveSystem = true ) const;
     131             : 
     132             :     /** Obtain ISO strings for language, script and country.
     133             : 
     134             :         This is a convenience method for ODF export places only. Avoid use in
     135             :         other code.
     136             : 
     137             :         ATTENTION! May return empty strings if the language tag is not
     138             :         expressable in valid ISO codes!
     139             : 
     140             :         @see isIsoODF()
     141             : 
     142             :         Always resolves an empty tag to the system locale.
     143             :      */
     144             :     void                            getIsoLanguageScriptCountry( OUString& rLanguage,
     145             :                                                                  OUString& rScript, OUString& rCountry ) const;
     146             : 
     147             :     /** Get ISO 639 language code, or BCP 47 language.
     148             : 
     149             :         Always resolves an empty tag to the system locale.
     150             :      */
     151             :     OUString                        getLanguage() const;
     152             : 
     153             :     /** Get ISO 15924 script code, if not the default script according to
     154             :         BCP 47. For default script an empty string is returned.
     155             : 
     156             :         @see hasScript()
     157             : 
     158             :         Always resolves an empty tag to the system locale.
     159             :      */
     160             :     OUString                        getScript() const;
     161             : 
     162             :     /** Get combined language and script code, separated by '-' if
     163             :         non-default script, if default script only language.
     164             : 
     165             :         @see hasScript()
     166             : 
     167             :         Always resolves an empty tag to the system locale.
     168             :      */
     169             :     OUString                        getLanguageAndScript() const;
     170             : 
     171             :     /** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a
     172             :         region not expressable as 2 character country code.
     173             : 
     174             :         Always resolves an empty tag to the system locale.
     175             :      */
     176             :     OUString                        getCountry() const;
     177             : 
     178             :     /** Get BCP 47 variant subtags, of the IANA Language Subtag Registry.
     179             : 
     180             :         If there are multiple variant subtags they are separated by '-'.
     181             : 
     182             :         This is NOT related to Locale.Variant!
     183             : 
     184             :         Always resolves an empty tag to the system locale.
     185             :      */
     186             :     OUString                        getVariants() const;
     187             : 
     188             :     /** Get a GLIBC locale string.
     189             : 
     190             :         Always resolves an empty tag to the system locale.
     191             : 
     192             :         @param  rEncoding
     193             :                 An encoding to be appended to language_country, for example
     194             :                 ".UTF-8" including the dot.
     195             : 
     196             :         @return The resulting GLIBC locale string if it could be constructed,
     197             :                 if not an empty string is returned.
     198             :      */
     199             :     OUString                        getGlibcLocaleString( const OUString & rEncoding ) const;
     200             : 
     201             :     /** If language tag has a non-default script specified.
     202             :      */
     203             :     bool                            hasScript() const;
     204             : 
     205             :     /** If language tag is a locale that can be expressed using only ISO 639
     206             :         language codes and ISO 3166 country codes, thus is convertible to a
     207             :         conforming Locale struct without using extension mechanisms.
     208             : 
     209             :         Note that an empty language tag or empty Locale::Language field or
     210             :         LanguageType LANGUAGE_SYSTEM could be treated as a valid ISO locale in
     211             :         some context, but here is not. If you want that ask for
     212             :         aTag.isSystemLocale() || aTag.isIsoLocale()
     213             : 
     214             :         Always resolves an empty tag to the system locale.
     215             :      */
     216             :     bool                            isIsoLocale() const;
     217             : 
     218             :     /** If language tag is a locale that can be expressed using only ISO 639
     219             :         language codes and ISO 15924 script codes and ISO 3166 country codes,
     220             :         thus can be stored in an ODF document using only fo:language, fo:script
     221             :         and fo:country attributes. If this is FALSE, the locale must be stored
     222             :         as a <*:rfc-language-tag> element.
     223             : 
     224             :         Always resolves an empty tag to the system locale.
     225             :      */
     226             :     bool                            isIsoODF() const;
     227             : 
     228             :     /** If this is a valid BCP 47 language tag.
     229             : 
     230             :         Always resolves an empty tag to the system locale.
     231             : 
     232             :         @seealso    static bool isValidBcp47(const OUString&)
     233             :      */
     234             :     bool                            isValidBcp47() const;
     235             : 
     236             :     /** If this tag was contructed as an empty tag denoting the system locale.
     237             :       */
     238    99445280 :     bool                            isSystemLocale() const { return mbSystemLocale;}
     239             : 
     240             : 
     241             :     /** Reset with existing BCP 47 language tag string. See ctor. */
     242             :     LanguageTag &                   reset( const OUString & rBcp47LanguageTag, bool bCanonicalize = false );
     243             : 
     244             :     /** Reset with Locale. */
     245             :     LanguageTag &                   reset( const com::sun::star::lang::Locale & rLocale );
     246             : 
     247             :     /** Reset with LanguageType MS-LangID. */
     248             :     LanguageTag &                   reset( LanguageType nLanguage );
     249             : 
     250             : 
     251             :     /** Fall back to a known locale.
     252             : 
     253             :         If the current tag does not represent a known (by us) locale, fall back
     254             :         to the most likely locale possible known.
     255             :         If the current tag is known, no change occurs.
     256             :      */
     257             :     LanguageTag &                   makeFallback();
     258             : 
     259             :     /** Return a vector of fall-back strings.
     260             : 
     261             :         In order:
     262             :         full BCP 47 tag, same as getBcp47()
     263             :         lll-Ssss-CC
     264             :         lll-Ssss
     265             :         lll-CC
     266             :         lll
     267             : 
     268             :         If the tag includes variants the order is:
     269             :         full BCP 47 tag, same as getBcp47()
     270             :         lll-Ssss-CC-vvvvvvvv
     271             :         lll-Ssss-vvvvvvvv
     272             :         lll-Ssss-CC
     273             :         lll-Ssss
     274             :         lll-CC-vvvvvvvv
     275             :         lll-vvvvvvvv
     276             :         lll-CC
     277             :         lll
     278             : 
     279             :         Only strings that differ from a higher order are included, for example
     280             :         if there is no script the elements will be bcp47, lll-CC, lll; if the
     281             :         bcp47 string is identical to lll-CC then only lll-CC, lll.
     282             : 
     283             :         Note that lll is only ISO 639-1/2 alpha code and CC is only ISO 3166
     284             :         alpha code. If the region can not be expressed as ISO 3166 then no -CC
     285             :         tags are included.
     286             : 
     287             :         @param  bIncludeFullBcp47
     288             :                 If TRUE, the full BCP 47 tag is included as first element.
     289             :                 If FALSE, the full tag is not included; used if the caller
     290             :                 obtains the fallbacks only if the full tag did not lead to a
     291             :                 match, so subsequent tries need not to include it again.
     292             :      */
     293             :     ::std::vector< OUString >       getFallbackStrings( bool bIncludeFullBcp47 ) const;
     294             : 
     295             : 
     296             :     /** @short  Search for an equal or at least for a similar locale in a list
     297             :                 of possible ones.
     298             : 
     299             :         @descr  First search for a locale that is equal to the reference
     300             :                 locale. (means: same BCP47 string)
     301             : 
     302             :                 If the reference locale could not be located, check for
     303             :                 "similar" locales, in the same order as obtained by
     304             :                 getFallbackStrings().
     305             : 
     306             :                 If no similar locale could be located, we search for a locale
     307             :                 "en-US" inside the given locale list.
     308             : 
     309             :                 If "en-US" could not be located, we search for a locale "en"
     310             :                 inside the given list.
     311             : 
     312             :                 If no "same" nor any "similar" locale could be found, we try
     313             :                 "x-default" and "x-no-translate" explicitly. Sometimes
     314             :                 variables don't use real localization. For example, in case the
     315             :                 localized value is a fix product name.
     316             : 
     317             :                 If no locale matched until then, we use any other locale that
     318             :                 exists inside the set of given ones, namely the first
     319             :                 encountered!
     320             : 
     321             :         @param  rList
     322             :                 the vector of possible locales as BCP47 strings.
     323             : 
     324             :         @param  rReference
     325             :                 the reference locale, BCP47 string.
     326             : 
     327             :         @return An iterator that points to the found element inside the given
     328             :                 locale list. If no matching locale could be found it points to
     329             :                 the beginning of the list.
     330             :      */
     331             :     static ::std::vector< OUString >::const_iterator getFallback( const ::std::vector< OUString > & rList,
     332             :                                                                   const OUString & rReference );
     333             : 
     334             : 
     335             :     /** @short  Search for an equal or for a similar locale in a list
     336             :                 of possible ones where at least the language matches.
     337             : 
     338             :         @descr  First search for a locale that is equal to the reference
     339             :                 locale.
     340             : 
     341             :                 If the reference locale could not be located, check for
     342             :                 "similar" locales, in the same order as obtained by
     343             :                 getFallbackStrings().
     344             : 
     345             :                 If no locale matches, rList.end() is returned.
     346             : 
     347             :         @param  rList
     348             :                 the vector of possible locales.
     349             : 
     350             :         @param  rReference
     351             :                 the reference locale.
     352             : 
     353             :         @return An iterator that points to the found element inside the given
     354             :                 locale list. If no matching locale could be found it points to
     355             :                 the end of the list.
     356             :      */
     357             :     static ::std::vector< com::sun::star::lang::Locale >::const_iterator getMatchingFallback(
     358             :             const ::std::vector< com::sun::star::lang::Locale > & rList,
     359             :             const com::sun::star::lang::Locale & rReference );
     360             : 
     361             : 
     362             :     /** Test equality of two LanguageTag, possibly resolving system locale.
     363             : 
     364             :         @param bResolveSystem
     365             :                If TRUE, resolve empty language tags denoting the system
     366             :                locale to the real locale used before comparing.
     367             :                If FALSE, the behavior is identical to operator==(), system
     368             :                locales are not resolved first.
     369             :       */
     370             :     bool                            equals( const LanguageTag & rLanguageTag, bool bResolveSystem = false ) const;
     371             : 
     372             :     /** Test equality of two LanguageTag.
     373             : 
     374             :         Does NOT resolve system, i.e. if the system locale is en-US
     375             :         LanguageTag("")==LanguageTag("en-US") returns false! Use
     376             :         equals(...,true) instead if system locales shall be resolved.
     377             :      */
     378             :     bool    operator==( const LanguageTag & rLanguageTag ) const;
     379             : 
     380             :     /** Test inequality of two LanguageTag.
     381             : 
     382             :         Does NOT resolve system, i.e. if the system locale is en-US
     383             :         LanguageTag("")!=LanguageTag("en-US") returns true! Use
     384             :         !equals(,...true) instead if system locales shall be resolved.
     385             :      */
     386             :     bool    operator!=( const LanguageTag & rLanguageTag ) const;
     387             : 
     388             :     /** Test this LanguageTag less than that LanguageTag.
     389             : 
     390             :         For sorted containers. Does NOT resolve system.
     391             :      */
     392             :     bool    operator<( const LanguageTag & rLanguageTag ) const;
     393             : 
     394             :     /** Convert MS-LangID to Locale.
     395             : 
     396             :         @param bResolveSystem
     397             :                If TRUE, resolve an empty language tag denoting the system
     398             :                locale to the real locale used.
     399             :                If FALSE, return an empty Locale for such a tag.
     400             :      */
     401             :     static com::sun::star::lang::Locale convertToLocale( LanguageType nLangID, bool bResolveSystem = true );
     402             : 
     403             :     /** Convert Locale to MS-LangID.
     404             : 
     405             :         @param bResolveSystem
     406             :                If TRUE, resolve an empty language tag denoting the system
     407             :                locale to the real locale used.
     408             :                If FALSE, return LANGUAGE_SYSTEM for such a tag.
     409             :      */
     410             :     static LanguageType convertToLanguageType( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem = true );
     411             : 
     412             :     /** Convert MS-LangID to BCP 47 string.
     413             : 
     414             :         @param bResolveSystem
     415             :                If TRUE, resolve an empty language tag denoting the system
     416             :                locale to the real locale used.
     417             :                If FALSE, return an empty OUString for such a tag.
     418             :      */
     419             :     static OUString convertToBcp47( LanguageType nLangID, bool bResolveSystem = true );
     420             : 
     421             :     /** Convert Locale to BCP 47 string.
     422             : 
     423             :         @param bResolveSystem
     424             :                If TRUE, resolve an empty language tag denoting the system
     425             :                locale to the real locale used.
     426             :                If FALSE, return an empty OUString for such a tag.
     427             :      */
     428             :     static OUString convertToBcp47( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem = true );
     429             : 
     430             :     /** Convert BCP 47 string to Locale, convenience method.
     431             : 
     432             :         NOTE: exists only for consistency with the other convertTo...()
     433             :         methods, internally uses a temporary LanguageTag instance for
     434             :         conversion so does not save anything compared to
     435             :         LanguageTag(rBcp47).getLocale(bResolveSystem).
     436             : 
     437             :         @param bResolveSystem
     438             :                If TRUE, resolve an empty language tag denoting the system
     439             :                locale to the real locale used.
     440             :                If FALSE, return an empty Locale for such a tag.
     441             :      */
     442             :     static com::sun::star::lang::Locale convertToLocale( const OUString& rBcp47, bool bResolveSystem = true );
     443             : 
     444             :     /** Convert BCP 47 string to MS-LangID, convenience method.
     445             : 
     446             :         NOTE: exists only for consistency with the other convertTo...()
     447             :         methods, internally uses a temporary LanguageTag instance for
     448             :         conversion so does not save anything compared to
     449             :         LanguageTag(rBcp47).getLanguageType(bResolveSystem).
     450             : 
     451             :         @param bResolveSystem
     452             :                If TRUE, resolve an empty language tag denoting the system
     453             :                locale to the real locale used.
     454             :                If FALSE, return LANGUAGE_SYSTEM for such a tag.
     455             :      */
     456             :     static LanguageType convertToLanguageType( const OUString& rBcp47, bool bResolveSystem = true );
     457             : 
     458             :     /** Convert BCP 47 string to MS-LangID with fallback, convenience method.
     459             : 
     460             :         NOTE: exists only for consistency with the other convertTo...()
     461             :         methods, internally uses a temporary LanguageTag instance for
     462             :         conversion so does not save anything compared to
     463             :         LanguageTag(rBcp47).makeFallback().getLanguageType(bResolveSystem).
     464             : 
     465             :         @see    makeFallback()
     466             : 
     467             :         Always resolves an empty tag to the system locale.
     468             :      */
     469             :     static LanguageType convertToLanguageTypeWithFallback( const OUString& rBcp47 );
     470             : 
     471             :     /** Convert BCP 47 string to Locale with fallback, convenience method.
     472             : 
     473             :         NOTE: exists only for consistency with the other convertTo...()
     474             :         methods, internally uses a temporary LanguageTag instance for
     475             :         conversion so does not save anything compared to
     476             :         LanguageTag(rBcp47).makeFallback().getLocale(bResolveSystem).
     477             : 
     478             :         @see    makeFallback()
     479             : 
     480             :         Always resolves an empty tag to the system locale.
     481             :      */
     482             :     static com::sun::star::lang::Locale convertToLocaleWithFallback( const OUString& rBcp47 );
     483             : 
     484             :     /** If rString represents a valid BCP 47 language tag.
     485             : 
     486             :         Never resolves an empty tag to the system locale, in fact an empty
     487             :         string is invalid here. Does not create an instance to be registered
     488             :         with a conversion to Locale or LanguageType.
     489             : 
     490             :         @param  o_pCanonicalized
     491             :                 If given and rString is a valid BCP 47 language tag, the
     492             :                 canonicalized form is assigned, which may differ from the
     493             :                 original string even if that was a valid tag. If rString is not
     494             :                 a valid tag, nothing is assigned.
     495             : 
     496             :         @param  bDisallowPrivate
     497             :                 If TRUE, valid tags according to BCP 47 but reserved for
     498             :                 private use, like 'x-...', are not allowed and FALSE is
     499             :                 returned in this case.
     500             :      */
     501             :     static bool         isValidBcp47( const OUString& rString, OUString* o_pCanonicalized = NULL,
     502             :                                       bool bDisallowPrivate = false );
     503             : 
     504             :     /** If nLang is a generated on-the-fly LangID */
     505             :     static bool         isOnTheFlyID( LanguageType nLang );
     506             : 
     507             :     /** @ATTENTION: _ONLY_ to be called by the application's configuration! */
     508             :     static void setConfiguredSystemLanguage( LanguageType nLang );
     509             : 
     510             :     typedef std::shared_ptr< LanguageTagImpl > ImplPtr;
     511             : 
     512             : private:
     513             : 
     514             :     mutable com::sun::star::lang::Locale    maLocale;
     515             :     mutable OUString                        maBcp47;
     516             :     mutable LanguageType                    mnLangID;
     517             :     mutable ImplPtr                         mpImpl;
     518             :             bool                            mbSystemLocale      : 1;
     519             :     mutable bool                            mbInitializedBcp47  : 1;
     520             :     mutable bool                            mbInitializedLocale : 1;
     521             :     mutable bool                            mbInitializedLangID : 1;
     522             :             bool                            mbIsFallback        : 1;
     523             : 
     524             :     ImplPtr             getImpl() const;
     525             :     ImplPtr             registerImpl() const;
     526             :     void                syncFromImpl();
     527             :     void                syncVarsFromRawImpl() const;
     528             :     void                syncVarsFromImpl() const;
     529             : 
     530             :     void                convertLocaleToLang();
     531             :     void                convertBcp47ToLocale();
     532             :     void                convertBcp47ToLang();
     533             :     void                convertLangToLocale();
     534             : 
     535             :     void                convertFromRtlLocale();
     536             : 
     537             :     /** Canonicalize if not yet done and synchronize initialized conversions.
     538             : 
     539             :         @return whether BCP 47 language tag string was changed.
     540             :      */
     541             :     bool                synCanonicalize();
     542             : 
     543             :     void                resetVars();
     544             : 
     545             :     static bool         isIsoLanguage( const OUString& rLanguage );
     546             :     static bool         isIsoScript( const OUString& rScript );
     547             :     static bool         isIsoCountry( const OUString& rRegion );
     548             : 
     549             : };
     550             : 
     551             : #endif  // INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
     552             : 
     553             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11