Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : */
9 :
10 : #ifndef INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
11 : #define INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
12 :
13 : #include <sal/config.h>
14 : #include <rtl/ustring.hxx>
15 : #include <com/sun/star/lang/Locale.hpp>
16 : #include <i18nlangtag/i18nlangtagdllapi.h>
17 : #include <i18nlangtag/lang.h>
18 :
19 : #include <memory>
20 : #include <vector>
21 :
22 : typedef struct _rtl_Locale rtl_Locale; // as in rtl/locale.h
23 :
24 :
25 : /** The ISO 639-2 code reserved for local use used to indicate that a
26 : com::sun::star::Locale contains a BCP 47 string in its Variant field. The
27 : Locale's Language field then will contain this language code.
28 :
29 : @see LanguageTag::getLocale()
30 :
31 : Avoid use, only needed internally or if conversion from Locale to
32 : LanguageTag is not wanted, i.e. during ODF import. To check whether a
33 : LanguageTag contains a plain language/country combination or a more
34 : detailed BCP 47 language tag use LanguageTag::isIsoLocale() instead.
35 : */
36 : #define I18NLANGTAG_QLT "qlt"
37 :
38 :
39 : class LanguageTagImpl;
40 :
41 :
42 : /** Wrapper for liblangtag BCP 47 language tags, MS-LangIDs, locales and
43 : conversions in between.
44 :
45 : Note that member variables are mutable and may change their values even in
46 : const methods. Getter methods return either the original value or matching
47 : converted values.
48 :
49 : For standalone conversions if no LanguageTag instance is at hand, static
50 : convertTo...() methods exist.
51 : */
52 : class I18NLANGTAG_DLLPUBLIC LanguageTag
53 : {
54 : friend class LanguageTagImpl;
55 :
56 : public:
57 :
58 : /** Init LanguageTag with existing BCP 47 language tag string.
59 :
60 : @param bCanonicalize
61 : If TRUE, canonicalize tag and reparse, the resulting tag string may
62 : be different.
63 : IF FALSE, the tag is simply stored and can be retrieved with
64 : getBcp47().
65 :
66 : Note that conversions to ISO codes, locales or LanguageType or
67 : obtaining language or script will canonicalize the tag string anyway,
68 : so specifying bCanonicalize=false is not a guarantee that the tag will
69 : stay identical to what was passed.
70 : */
71 : explicit LanguageTag( const OUString & rBcp47LanguageTag, bool bCanonicalize = false );
72 :
73 : /** Init LanguageTag with Locale. */
74 : explicit LanguageTag( const com::sun::star::lang::Locale & rLocale );
75 :
76 : /** Init LanguageTag with LanguageType MS-LangID. */
77 : explicit LanguageTag( LanguageType nLanguage );
78 :
79 : /** Init LanguageTag with either BCP 47 language tag (precedence if not
80 : empty), or a combination of language, script and country.
81 :
82 : This is a convenience ctor to be used in ODF import where these are
83 : distinct attributes.
84 : */
85 : explicit LanguageTag( const OUString& rBcp47, const OUString& rLanguage,
86 : const OUString& rScript, const OUString& rCountry );
87 :
88 : /** Init LanguageTag with rtl_Locale.
89 :
90 : This is a convenience ctor.
91 : */
92 : explicit LanguageTag( const rtl_Locale & rLocale );
93 :
94 : LanguageTag( const LanguageTag & rLanguageTag );
95 : ~LanguageTag();
96 : LanguageTag& operator=( const LanguageTag & rLanguageTag );
97 :
98 : /** Obtain BCP 47 language tag.
99 :
100 : @param bResolveSystem
101 : If TRUE, resolve an empty language tag denoting the system
102 : locale to the real locale used.
103 : If FALSE, return an empty OUString for such a tag.
104 : */
105 : const OUString & getBcp47( bool bResolveSystem = true ) const;
106 :
107 : /** Obtain language tag as Locale.
108 :
109 : As a convention, language tags that can not be expressed as "pure"
110 : com::sun::star::lang::Locale content using Language and Country fields
111 : store "qlt" (ISO 639 reserved for local use) in the Language field and
112 : the entire BCP 47 language tag in the Variant field. The Country field
113 : contains the corresponding ISO 3166 country code _if_ there is one, or
114 : otherwise is empty.
115 :
116 : @param bResolveSystem
117 : If TRUE, resolve an empty language tag denoting the system
118 : locale to the real locale used.
119 : If FALSE, return an empty Locale for such a tag.
120 : */
121 : const com::sun::star::lang::Locale & getLocale( bool bResolveSystem = true ) const;
122 :
123 : /** Obtain mapping to MS-LangID.
124 :
125 : @param bResolveSystem
126 : If TRUE, resolve an empty language tag denoting the system
127 : locale to the real locale used.
128 : If FALSE, return LANGUAGE_SYSTEM for such a tag.
129 : */
130 : LanguageType getLanguageType( bool bResolveSystem = true ) const;
131 :
132 : /** Obtain ISO strings for language, script and country.
133 :
134 : This is a convenience method for ODF export places only. Avoid use in
135 : other code.
136 :
137 : ATTENTION! May return empty strings if the language tag is not
138 : expressable in valid ISO codes!
139 :
140 : @see isIsoODF()
141 :
142 : Always resolves an empty tag to the system locale.
143 : */
144 : void getIsoLanguageScriptCountry( OUString& rLanguage,
145 : OUString& rScript, OUString& rCountry ) const;
146 :
147 : /** Get ISO 639 language code, or BCP 47 language.
148 :
149 : Always resolves an empty tag to the system locale.
150 : */
151 : OUString getLanguage() const;
152 :
153 : /** Get ISO 15924 script code, if not the default script according to
154 : BCP 47. For default script an empty string is returned.
155 :
156 : @see hasScript()
157 :
158 : Always resolves an empty tag to the system locale.
159 : */
160 : OUString getScript() const;
161 :
162 : /** Get combined language and script code, separated by '-' if
163 : non-default script, if default script only language.
164 :
165 : @see hasScript()
166 :
167 : Always resolves an empty tag to the system locale.
168 : */
169 : OUString getLanguageAndScript() const;
170 :
171 : /** Get ISO 3166 country alpha code. Empty if the BCP 47 tags denote a
172 : region not expressable as 2 character country code.
173 :
174 : Always resolves an empty tag to the system locale.
175 : */
176 : OUString getCountry() const;
177 :
178 : /** Get BCP 47 variant subtags, of the IANA Language Subtag Registry.
179 :
180 : If there are multiple variant subtags they are separated by '-'.
181 :
182 : This is NOT related to Locale.Variant!
183 :
184 : Always resolves an empty tag to the system locale.
185 : */
186 : OUString getVariants() const;
187 :
188 : /** Get a GLIBC locale string.
189 :
190 : Always resolves an empty tag to the system locale.
191 :
192 : @param rEncoding
193 : An encoding to be appended to language_country, for example
194 : ".UTF-8" including the dot.
195 :
196 : @return The resulting GLIBC locale string if it could be constructed,
197 : if not an empty string is returned.
198 : */
199 : OUString getGlibcLocaleString( const OUString & rEncoding ) const;
200 :
201 : /** If language tag has a non-default script specified.
202 : */
203 : bool hasScript() const;
204 :
205 : /** If language tag is a locale that can be expressed using only ISO 639
206 : language codes and ISO 3166 country codes, thus is convertible to a
207 : conforming Locale struct without using extension mechanisms.
208 :
209 : Note that an empty language tag or empty Locale::Language field or
210 : LanguageType LANGUAGE_SYSTEM could be treated as a valid ISO locale in
211 : some context, but here is not. If you want that ask for
212 : aTag.isSystemLocale() || aTag.isIsoLocale()
213 :
214 : Always resolves an empty tag to the system locale.
215 : */
216 : bool isIsoLocale() const;
217 :
218 : /** If language tag is a locale that can be expressed using only ISO 639
219 : language codes and ISO 15924 script codes and ISO 3166 country codes,
220 : thus can be stored in an ODF document using only fo:language, fo:script
221 : and fo:country attributes. If this is FALSE, the locale must be stored
222 : as a <*:rfc-language-tag> element.
223 :
224 : Always resolves an empty tag to the system locale.
225 : */
226 : bool isIsoODF() const;
227 :
228 : /** If this is a valid BCP 47 language tag.
229 :
230 : Always resolves an empty tag to the system locale.
231 :
232 : @seealso static bool isValidBcp47(const OUString&)
233 : */
234 : bool isValidBcp47() const;
235 :
236 : /** If this tag was contructed as an empty tag denoting the system locale.
237 : */
238 99445280 : bool isSystemLocale() const { return mbSystemLocale;}
239 :
240 :
241 : /** Reset with existing BCP 47 language tag string. See ctor. */
242 : LanguageTag & reset( const OUString & rBcp47LanguageTag, bool bCanonicalize = false );
243 :
244 : /** Reset with Locale. */
245 : LanguageTag & reset( const com::sun::star::lang::Locale & rLocale );
246 :
247 : /** Reset with LanguageType MS-LangID. */
248 : LanguageTag & reset( LanguageType nLanguage );
249 :
250 :
251 : /** Fall back to a known locale.
252 :
253 : If the current tag does not represent a known (by us) locale, fall back
254 : to the most likely locale possible known.
255 : If the current tag is known, no change occurs.
256 : */
257 : LanguageTag & makeFallback();
258 :
259 : /** Return a vector of fall-back strings.
260 :
261 : In order:
262 : full BCP 47 tag, same as getBcp47()
263 : lll-Ssss-CC
264 : lll-Ssss
265 : lll-CC
266 : lll
267 :
268 : If the tag includes variants the order is:
269 : full BCP 47 tag, same as getBcp47()
270 : lll-Ssss-CC-vvvvvvvv
271 : lll-Ssss-vvvvvvvv
272 : lll-Ssss-CC
273 : lll-Ssss
274 : lll-CC-vvvvvvvv
275 : lll-vvvvvvvv
276 : lll-CC
277 : lll
278 :
279 : Only strings that differ from a higher order are included, for example
280 : if there is no script the elements will be bcp47, lll-CC, lll; if the
281 : bcp47 string is identical to lll-CC then only lll-CC, lll.
282 :
283 : Note that lll is only ISO 639-1/2 alpha code and CC is only ISO 3166
284 : alpha code. If the region can not be expressed as ISO 3166 then no -CC
285 : tags are included.
286 :
287 : @param bIncludeFullBcp47
288 : If TRUE, the full BCP 47 tag is included as first element.
289 : If FALSE, the full tag is not included; used if the caller
290 : obtains the fallbacks only if the full tag did not lead to a
291 : match, so subsequent tries need not to include it again.
292 : */
293 : ::std::vector< OUString > getFallbackStrings( bool bIncludeFullBcp47 ) const;
294 :
295 :
296 : /** @short Search for an equal or at least for a similar locale in a list
297 : of possible ones.
298 :
299 : @descr First search for a locale that is equal to the reference
300 : locale. (means: same BCP47 string)
301 :
302 : If the reference locale could not be located, check for
303 : "similar" locales, in the same order as obtained by
304 : getFallbackStrings().
305 :
306 : If no similar locale could be located, we search for a locale
307 : "en-US" inside the given locale list.
308 :
309 : If "en-US" could not be located, we search for a locale "en"
310 : inside the given list.
311 :
312 : If no "same" nor any "similar" locale could be found, we try
313 : "x-default" and "x-no-translate" explicitly. Sometimes
314 : variables don't use real localization. For example, in case the
315 : localized value is a fix product name.
316 :
317 : If no locale matched until then, we use any other locale that
318 : exists inside the set of given ones, namely the first
319 : encountered!
320 :
321 : @param rList
322 : the vector of possible locales as BCP47 strings.
323 :
324 : @param rReference
325 : the reference locale, BCP47 string.
326 :
327 : @return An iterator that points to the found element inside the given
328 : locale list. If no matching locale could be found it points to
329 : the beginning of the list.
330 : */
331 : static ::std::vector< OUString >::const_iterator getFallback( const ::std::vector< OUString > & rList,
332 : const OUString & rReference );
333 :
334 :
335 : /** @short Search for an equal or for a similar locale in a list
336 : of possible ones where at least the language matches.
337 :
338 : @descr First search for a locale that is equal to the reference
339 : locale.
340 :
341 : If the reference locale could not be located, check for
342 : "similar" locales, in the same order as obtained by
343 : getFallbackStrings().
344 :
345 : If no locale matches, rList.end() is returned.
346 :
347 : @param rList
348 : the vector of possible locales.
349 :
350 : @param rReference
351 : the reference locale.
352 :
353 : @return An iterator that points to the found element inside the given
354 : locale list. If no matching locale could be found it points to
355 : the end of the list.
356 : */
357 : static ::std::vector< com::sun::star::lang::Locale >::const_iterator getMatchingFallback(
358 : const ::std::vector< com::sun::star::lang::Locale > & rList,
359 : const com::sun::star::lang::Locale & rReference );
360 :
361 :
362 : /** Test equality of two LanguageTag, possibly resolving system locale.
363 :
364 : @param bResolveSystem
365 : If TRUE, resolve empty language tags denoting the system
366 : locale to the real locale used before comparing.
367 : If FALSE, the behavior is identical to operator==(), system
368 : locales are not resolved first.
369 : */
370 : bool equals( const LanguageTag & rLanguageTag, bool bResolveSystem = false ) const;
371 :
372 : /** Test equality of two LanguageTag.
373 :
374 : Does NOT resolve system, i.e. if the system locale is en-US
375 : LanguageTag("")==LanguageTag("en-US") returns false! Use
376 : equals(...,true) instead if system locales shall be resolved.
377 : */
378 : bool operator==( const LanguageTag & rLanguageTag ) const;
379 :
380 : /** Test inequality of two LanguageTag.
381 :
382 : Does NOT resolve system, i.e. if the system locale is en-US
383 : LanguageTag("")!=LanguageTag("en-US") returns true! Use
384 : !equals(,...true) instead if system locales shall be resolved.
385 : */
386 : bool operator!=( const LanguageTag & rLanguageTag ) const;
387 :
388 : /** Test this LanguageTag less than that LanguageTag.
389 :
390 : For sorted containers. Does NOT resolve system.
391 : */
392 : bool operator<( const LanguageTag & rLanguageTag ) const;
393 :
394 : /** Convert MS-LangID to Locale.
395 :
396 : @param bResolveSystem
397 : If TRUE, resolve an empty language tag denoting the system
398 : locale to the real locale used.
399 : If FALSE, return an empty Locale for such a tag.
400 : */
401 : static com::sun::star::lang::Locale convertToLocale( LanguageType nLangID, bool bResolveSystem = true );
402 :
403 : /** Convert Locale to MS-LangID.
404 :
405 : @param bResolveSystem
406 : If TRUE, resolve an empty language tag denoting the system
407 : locale to the real locale used.
408 : If FALSE, return LANGUAGE_SYSTEM for such a tag.
409 : */
410 : static LanguageType convertToLanguageType( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem = true );
411 :
412 : /** Convert MS-LangID to BCP 47 string.
413 :
414 : @param bResolveSystem
415 : If TRUE, resolve an empty language tag denoting the system
416 : locale to the real locale used.
417 : If FALSE, return an empty OUString for such a tag.
418 : */
419 : static OUString convertToBcp47( LanguageType nLangID, bool bResolveSystem = true );
420 :
421 : /** Convert Locale to BCP 47 string.
422 :
423 : @param bResolveSystem
424 : If TRUE, resolve an empty language tag denoting the system
425 : locale to the real locale used.
426 : If FALSE, return an empty OUString for such a tag.
427 : */
428 : static OUString convertToBcp47( const com::sun::star::lang::Locale& rLocale, bool bResolveSystem = true );
429 :
430 : /** Convert BCP 47 string to Locale, convenience method.
431 :
432 : NOTE: exists only for consistency with the other convertTo...()
433 : methods, internally uses a temporary LanguageTag instance for
434 : conversion so does not save anything compared to
435 : LanguageTag(rBcp47).getLocale(bResolveSystem).
436 :
437 : @param bResolveSystem
438 : If TRUE, resolve an empty language tag denoting the system
439 : locale to the real locale used.
440 : If FALSE, return an empty Locale for such a tag.
441 : */
442 : static com::sun::star::lang::Locale convertToLocale( const OUString& rBcp47, bool bResolveSystem = true );
443 :
444 : /** Convert BCP 47 string to MS-LangID, convenience method.
445 :
446 : NOTE: exists only for consistency with the other convertTo...()
447 : methods, internally uses a temporary LanguageTag instance for
448 : conversion so does not save anything compared to
449 : LanguageTag(rBcp47).getLanguageType(bResolveSystem).
450 :
451 : @param bResolveSystem
452 : If TRUE, resolve an empty language tag denoting the system
453 : locale to the real locale used.
454 : If FALSE, return LANGUAGE_SYSTEM for such a tag.
455 : */
456 : static LanguageType convertToLanguageType( const OUString& rBcp47, bool bResolveSystem = true );
457 :
458 : /** Convert BCP 47 string to MS-LangID with fallback, convenience method.
459 :
460 : NOTE: exists only for consistency with the other convertTo...()
461 : methods, internally uses a temporary LanguageTag instance for
462 : conversion so does not save anything compared to
463 : LanguageTag(rBcp47).makeFallback().getLanguageType(bResolveSystem).
464 :
465 : @see makeFallback()
466 :
467 : Always resolves an empty tag to the system locale.
468 : */
469 : static LanguageType convertToLanguageTypeWithFallback( const OUString& rBcp47 );
470 :
471 : /** Convert BCP 47 string to Locale with fallback, convenience method.
472 :
473 : NOTE: exists only for consistency with the other convertTo...()
474 : methods, internally uses a temporary LanguageTag instance for
475 : conversion so does not save anything compared to
476 : LanguageTag(rBcp47).makeFallback().getLocale(bResolveSystem).
477 :
478 : @see makeFallback()
479 :
480 : Always resolves an empty tag to the system locale.
481 : */
482 : static com::sun::star::lang::Locale convertToLocaleWithFallback( const OUString& rBcp47 );
483 :
484 : /** If rString represents a valid BCP 47 language tag.
485 :
486 : Never resolves an empty tag to the system locale, in fact an empty
487 : string is invalid here. Does not create an instance to be registered
488 : with a conversion to Locale or LanguageType.
489 :
490 : @param o_pCanonicalized
491 : If given and rString is a valid BCP 47 language tag, the
492 : canonicalized form is assigned, which may differ from the
493 : original string even if that was a valid tag. If rString is not
494 : a valid tag, nothing is assigned.
495 :
496 : @param bDisallowPrivate
497 : If TRUE, valid tags according to BCP 47 but reserved for
498 : private use, like 'x-...', are not allowed and FALSE is
499 : returned in this case.
500 : */
501 : static bool isValidBcp47( const OUString& rString, OUString* o_pCanonicalized = NULL,
502 : bool bDisallowPrivate = false );
503 :
504 : /** If nLang is a generated on-the-fly LangID */
505 : static bool isOnTheFlyID( LanguageType nLang );
506 :
507 : /** @ATTENTION: _ONLY_ to be called by the application's configuration! */
508 : static void setConfiguredSystemLanguage( LanguageType nLang );
509 :
510 : typedef std::shared_ptr< LanguageTagImpl > ImplPtr;
511 :
512 : private:
513 :
514 : mutable com::sun::star::lang::Locale maLocale;
515 : mutable OUString maBcp47;
516 : mutable LanguageType mnLangID;
517 : mutable ImplPtr mpImpl;
518 : bool mbSystemLocale : 1;
519 : mutable bool mbInitializedBcp47 : 1;
520 : mutable bool mbInitializedLocale : 1;
521 : mutable bool mbInitializedLangID : 1;
522 : bool mbIsFallback : 1;
523 :
524 : ImplPtr getImpl() const;
525 : ImplPtr registerImpl() const;
526 : void syncFromImpl();
527 : void syncVarsFromRawImpl() const;
528 : void syncVarsFromImpl() const;
529 :
530 : void convertLocaleToLang();
531 : void convertBcp47ToLocale();
532 : void convertBcp47ToLang();
533 : void convertLangToLocale();
534 :
535 : void convertFromRtlLocale();
536 :
537 : /** Canonicalize if not yet done and synchronize initialized conversions.
538 :
539 : @return whether BCP 47 language tag string was changed.
540 : */
541 : bool synCanonicalize();
542 :
543 : void resetVars();
544 :
545 : static bool isIsoLanguage( const OUString& rLanguage );
546 : static bool isIsoScript( const OUString& rScript );
547 : static bool isIsoCountry( const OUString& rRegion );
548 :
549 : };
550 :
551 : #endif // INCLUDED_I18NLANGTAG_LANGUAGETAG_HXX
552 :
553 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|