LCOV - code coverage report
Current view: top level - i18nutil/source/utility - unicode.cxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 46 378 12.2 %
Date: 2015-06-13 12:38:46 Functions: 11 12 91.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include <boost/scoped_ptr.hpp>
      21             : #include <com/sun/star/i18n/UnicodeType.hpp>
      22             : #include <com/sun/star/i18n/KCharacterType.hpp>
      23             : #include <com/sun/star/i18n/ScriptType.hpp>
      24             : #include <i18nlangtag/languagetag.hxx>
      25             : #include <i18nlangtag/languagetagicu.hxx>
      26             : #include <i18nutil/unicode.hxx>
      27             : #include <sal/log.hxx>
      28             : #include <unicode/numfmt.h>
      29             : #include "unicode_data.h"
      30             : 
      31             : // Workaround for glibc braindamage:
      32             : // glibc 2.4's langinfo.h does "#define CURRENCY_SYMBOL __CURRENCY_SYMBOL"
      33             : // which (obviously) breaks UnicodeType::CURRENCY_SYMBOL
      34             : #undef CURRENCY_SYMBOL
      35             : 
      36             : using namespace ::com::sun::star::i18n;
      37             : 
      38             : static const ScriptTypeList defaultTypeList[] = {
      39             :     { UnicodeScript_kBasicLatin,
      40             :       UnicodeScript_kBasicLatin,
      41             :       UnicodeScript_kBasicLatin },      // 0,
      42             :     { UnicodeScript_kLatin1Supplement,
      43             :       UnicodeScript_kLatin1Supplement,
      44             :       UnicodeScript_kLatin1Supplement },// 1,
      45             :     { UnicodeScript_kLatinExtendedA,
      46             :       UnicodeScript_kLatinExtendedA,
      47             :       UnicodeScript_kLatinExtendedA }, // 2,
      48             :     { UnicodeScript_kLatinExtendedB,
      49             :       UnicodeScript_kLatinExtendedB,
      50             :       UnicodeScript_kLatinExtendedB }, // 3,
      51             :     { UnicodeScript_kIPAExtension,
      52             :       UnicodeScript_kIPAExtension,
      53             :       UnicodeScript_kIPAExtension }, // 4,
      54             :     { UnicodeScript_kSpacingModifier,
      55             :       UnicodeScript_kSpacingModifier,
      56             :       UnicodeScript_kSpacingModifier }, // 5,
      57             :     { UnicodeScript_kCombiningDiacritical,
      58             :       UnicodeScript_kCombiningDiacritical,
      59             :       UnicodeScript_kCombiningDiacritical }, // 6,
      60             :     { UnicodeScript_kGreek,
      61             :       UnicodeScript_kGreek,
      62             :       UnicodeScript_kGreek }, // 7,
      63             :     { UnicodeScript_kCyrillic,
      64             :       UnicodeScript_kCyrillic,
      65             :       UnicodeScript_kCyrillic }, // 8,
      66             :     { UnicodeScript_kArmenian,
      67             :       UnicodeScript_kArmenian,
      68             :       UnicodeScript_kArmenian }, // 9,
      69             :     { UnicodeScript_kHebrew,
      70             :       UnicodeScript_kHebrew,
      71             :       UnicodeScript_kHebrew }, // 10,
      72             :     { UnicodeScript_kArabic,
      73             :       UnicodeScript_kArabic,
      74             :       UnicodeScript_kArabic }, // 11,
      75             :     { UnicodeScript_kSyriac,
      76             :       UnicodeScript_kSyriac,
      77             :       UnicodeScript_kSyriac }, // 12,
      78             :     { UnicodeScript_kThaana,
      79             :       UnicodeScript_kThaana,
      80             :       UnicodeScript_kThaana }, // 13,
      81             :     { UnicodeScript_kDevanagari,
      82             :       UnicodeScript_kDevanagari,
      83             :       UnicodeScript_kDevanagari }, // 14,
      84             :     { UnicodeScript_kBengali,
      85             :       UnicodeScript_kBengali,
      86             :       UnicodeScript_kBengali }, // 15,
      87             :     { UnicodeScript_kGurmukhi,
      88             :       UnicodeScript_kGurmukhi,
      89             :       UnicodeScript_kGurmukhi }, // 16,
      90             :     { UnicodeScript_kGujarati,
      91             :       UnicodeScript_kGujarati,
      92             :       UnicodeScript_kGujarati }, // 17,
      93             :     { UnicodeScript_kOriya,
      94             :       UnicodeScript_kOriya,
      95             :       UnicodeScript_kOriya }, // 18,
      96             :     { UnicodeScript_kTamil,
      97             :       UnicodeScript_kTamil,
      98             :       UnicodeScript_kTamil }, // 19,
      99             :     { UnicodeScript_kTelugu,
     100             :       UnicodeScript_kTelugu,
     101             :       UnicodeScript_kTelugu }, // 20,
     102             :     { UnicodeScript_kKannada,
     103             :       UnicodeScript_kKannada,
     104             :       UnicodeScript_kKannada }, // 21,
     105             :     { UnicodeScript_kMalayalam,
     106             :       UnicodeScript_kMalayalam,
     107             :       UnicodeScript_kMalayalam }, // 22,
     108             :     { UnicodeScript_kSinhala,
     109             :       UnicodeScript_kSinhala,
     110             :       UnicodeScript_kSinhala }, // 23,
     111             :     { UnicodeScript_kThai,
     112             :       UnicodeScript_kThai,
     113             :       UnicodeScript_kThai }, // 24,
     114             :     { UnicodeScript_kLao,
     115             :       UnicodeScript_kLao,
     116             :       UnicodeScript_kLao }, // 25,
     117             :     { UnicodeScript_kTibetan,
     118             :       UnicodeScript_kTibetan,
     119             :       UnicodeScript_kTibetan }, // 26,
     120             :     { UnicodeScript_kMyanmar,
     121             :       UnicodeScript_kMyanmar,
     122             :       UnicodeScript_kMyanmar }, // 27,
     123             :     { UnicodeScript_kGeorgian,
     124             :       UnicodeScript_kGeorgian,
     125             :       UnicodeScript_kGeorgian }, // 28,
     126             :     { UnicodeScript_kHangulJamo,
     127             :       UnicodeScript_kHangulJamo,
     128             :       UnicodeScript_kHangulJamo }, // 29,
     129             :     { UnicodeScript_kEthiopic,
     130             :       UnicodeScript_kEthiopic,
     131             :       UnicodeScript_kEthiopic }, // 30,
     132             :     { UnicodeScript_kCherokee,
     133             :       UnicodeScript_kCherokee,
     134             :       UnicodeScript_kCherokee }, // 31,
     135             :     { UnicodeScript_kUnifiedCanadianAboriginalSyllabics,
     136             :       UnicodeScript_kUnifiedCanadianAboriginalSyllabics,
     137             :       UnicodeScript_kUnifiedCanadianAboriginalSyllabics }, // 32,
     138             :     { UnicodeScript_kOgham,
     139             :       UnicodeScript_kOgham,
     140             :       UnicodeScript_kOgham }, // 33,
     141             :     { UnicodeScript_kRunic,
     142             :       UnicodeScript_kRunic,
     143             :       UnicodeScript_kRunic }, // 34,
     144             :     { UnicodeScript_kKhmer,
     145             :       UnicodeScript_kKhmer,
     146             :       UnicodeScript_kKhmer }, // 35,
     147             :     { UnicodeScript_kMongolian,
     148             :       UnicodeScript_kMongolian,
     149             :       UnicodeScript_kMongolian }, // 36,
     150             :     { UnicodeScript_kLatinExtendedAdditional,
     151             :       UnicodeScript_kLatinExtendedAdditional,
     152             :       UnicodeScript_kLatinExtendedAdditional }, // 37,
     153             :     { UnicodeScript_kGreekExtended,
     154             :       UnicodeScript_kGreekExtended,
     155             :       UnicodeScript_kGreekExtended }, // 38,
     156             :     { UnicodeScript_kGeneralPunctuation,
     157             :       UnicodeScript_kGeneralPunctuation,
     158             :       UnicodeScript_kGeneralPunctuation }, // 39,
     159             :     { UnicodeScript_kSuperSubScript,
     160             :       UnicodeScript_kSuperSubScript,
     161             :       UnicodeScript_kSuperSubScript }, // 40,
     162             :     { UnicodeScript_kCurrencySymbolScript,
     163             :       UnicodeScript_kCurrencySymbolScript,
     164             :       UnicodeScript_kCurrencySymbolScript }, // 41,
     165             :     { UnicodeScript_kSymbolCombiningMark,
     166             :       UnicodeScript_kSymbolCombiningMark,
     167             :       UnicodeScript_kSymbolCombiningMark }, // 42,
     168             :     { UnicodeScript_kLetterlikeSymbol,
     169             :       UnicodeScript_kLetterlikeSymbol,
     170             :       UnicodeScript_kLetterlikeSymbol }, // 43,
     171             :     { UnicodeScript_kNumberForm,
     172             :       UnicodeScript_kNumberForm,
     173             :       UnicodeScript_kNumberForm }, // 44,
     174             :     { UnicodeScript_kArrow,
     175             :       UnicodeScript_kArrow,
     176             :       UnicodeScript_kArrow }, // 45,
     177             :     { UnicodeScript_kMathOperator,
     178             :       UnicodeScript_kMathOperator,
     179             :       UnicodeScript_kMathOperator }, // 46,
     180             :     { UnicodeScript_kMiscTechnical,
     181             :       UnicodeScript_kMiscTechnical,
     182             :       UnicodeScript_kMiscTechnical }, // 47,
     183             :     { UnicodeScript_kControlPicture,
     184             :       UnicodeScript_kControlPicture,
     185             :       UnicodeScript_kControlPicture }, // 48,
     186             :     { UnicodeScript_kOpticalCharacter,
     187             :       UnicodeScript_kOpticalCharacter,
     188             :       UnicodeScript_kOpticalCharacter }, // 49,
     189             :     { UnicodeScript_kEnclosedAlphanumeric,
     190             :       UnicodeScript_kEnclosedAlphanumeric,
     191             :       UnicodeScript_kEnclosedAlphanumeric }, // 50,
     192             :     { UnicodeScript_kBoxDrawing,
     193             :       UnicodeScript_kBoxDrawing,
     194             :       UnicodeScript_kBoxDrawing }, // 51,
     195             :     { UnicodeScript_kBlockElement,
     196             :       UnicodeScript_kBlockElement,
     197             :       UnicodeScript_kBlockElement }, // 52,
     198             :     { UnicodeScript_kGeometricShape,
     199             :       UnicodeScript_kGeometricShape,
     200             :       UnicodeScript_kGeometricShape }, // 53,
     201             :     { UnicodeScript_kMiscSymbol,
     202             :       UnicodeScript_kMiscSymbol,
     203             :       UnicodeScript_kMiscSymbol }, // 54,
     204             :     { UnicodeScript_kDingbat,
     205             :       UnicodeScript_kDingbat,
     206             :       UnicodeScript_kDingbat }, // 55,
     207             :     { UnicodeScript_kBraillePatterns,
     208             :       UnicodeScript_kBraillePatterns,
     209             :       UnicodeScript_kBraillePatterns }, // 56,
     210             :     { UnicodeScript_kCJKRadicalsSupplement,
     211             :       UnicodeScript_kCJKRadicalsSupplement,
     212             :       UnicodeScript_kCJKRadicalsSupplement }, // 57,
     213             :     { UnicodeScript_kKangxiRadicals,
     214             :       UnicodeScript_kKangxiRadicals,
     215             :       UnicodeScript_kKangxiRadicals }, // 58,
     216             :     { UnicodeScript_kIdeographicDescriptionCharacters,
     217             :       UnicodeScript_kIdeographicDescriptionCharacters,
     218             :       UnicodeScript_kIdeographicDescriptionCharacters }, // 59,
     219             :     { UnicodeScript_kCJKSymbolPunctuation,
     220             :       UnicodeScript_kCJKSymbolPunctuation,
     221             :       UnicodeScript_kCJKSymbolPunctuation }, // 60,
     222             :     { UnicodeScript_kHiragana,
     223             :       UnicodeScript_kHiragana,
     224             :       UnicodeScript_kHiragana }, // 61,
     225             :     { UnicodeScript_kKatakana,
     226             :       UnicodeScript_kKatakana,
     227             :       UnicodeScript_kKatakana }, // 62,
     228             :     { UnicodeScript_kBopomofo,
     229             :       UnicodeScript_kBopomofo,
     230             :       UnicodeScript_kBopomofo }, // 63,
     231             :     { UnicodeScript_kHangulCompatibilityJamo,
     232             :       UnicodeScript_kHangulCompatibilityJamo,
     233             :       UnicodeScript_kHangulCompatibilityJamo }, // 64,
     234             :     { UnicodeScript_kKanbun,
     235             :       UnicodeScript_kKanbun,
     236             :       UnicodeScript_kKanbun }, // 65,
     237             :     { UnicodeScript_kBopomofoExtended,
     238             :       UnicodeScript_kBopomofoExtended,
     239             :       UnicodeScript_kBopomofoExtended }, // 66,
     240             :     { UnicodeScript_kEnclosedCJKLetterMonth,
     241             :       UnicodeScript_kEnclosedCJKLetterMonth,
     242             :       UnicodeScript_kEnclosedCJKLetterMonth }, // 67,
     243             :     { UnicodeScript_kCJKCompatibility,
     244             :       UnicodeScript_kCJKCompatibility,
     245             :       UnicodeScript_kCJKCompatibility }, // 68,
     246             :     { UnicodeScript_k_CJKUnifiedIdeographsExtensionA,
     247             :       UnicodeScript_k_CJKUnifiedIdeographsExtensionA,
     248             :       UnicodeScript_k_CJKUnifiedIdeographsExtensionA }, // 69,
     249             :     { UnicodeScript_kCJKUnifiedIdeograph,
     250             :       UnicodeScript_kCJKUnifiedIdeograph,
     251             :       UnicodeScript_kCJKUnifiedIdeograph }, // 70,
     252             :     { UnicodeScript_kYiSyllables,
     253             :       UnicodeScript_kYiSyllables,
     254             :       UnicodeScript_kYiSyllables }, // 71,
     255             :     { UnicodeScript_kYiRadicals,
     256             :       UnicodeScript_kYiRadicals,
     257             :       UnicodeScript_kYiRadicals }, // 72,
     258             :     { UnicodeScript_kHangulSyllable,
     259             :       UnicodeScript_kHangulSyllable,
     260             :       UnicodeScript_kHangulSyllable }, // 73,
     261             :     { UnicodeScript_kHighSurrogate,
     262             :       UnicodeScript_kHighSurrogate,
     263             :       UnicodeScript_kHighSurrogate }, // 74,
     264             :     { UnicodeScript_kHighPrivateUseSurrogate,
     265             :       UnicodeScript_kHighPrivateUseSurrogate,
     266             :       UnicodeScript_kHighPrivateUseSurrogate }, // 75,
     267             :     { UnicodeScript_kLowSurrogate,
     268             :       UnicodeScript_kLowSurrogate,
     269             :       UnicodeScript_kLowSurrogate }, // 76,
     270             :     { UnicodeScript_kPrivateUse,
     271             :       UnicodeScript_kPrivateUse,
     272             :       UnicodeScript_kPrivateUse }, // 77,
     273             :     { UnicodeScript_kCJKCompatibilityIdeograph,
     274             :       UnicodeScript_kCJKCompatibilityIdeograph,
     275             :       UnicodeScript_kCJKCompatibilityIdeograph }, // 78,
     276             :     { UnicodeScript_kAlphabeticPresentation,
     277             :       UnicodeScript_kAlphabeticPresentation,
     278             :       UnicodeScript_kAlphabeticPresentation }, // 79,
     279             :     { UnicodeScript_kArabicPresentationA,
     280             :       UnicodeScript_kArabicPresentationA,
     281             :       UnicodeScript_kArabicPresentationA }, // 80,
     282             :     { UnicodeScript_kCombiningHalfMark,
     283             :       UnicodeScript_kCombiningHalfMark,
     284             :       UnicodeScript_kCombiningHalfMark }, // 81,
     285             :     { UnicodeScript_kCJKCompatibilityForm,
     286             :       UnicodeScript_kCJKCompatibilityForm,
     287             :       UnicodeScript_kCJKCompatibilityForm }, // 82,
     288             :     { UnicodeScript_kSmallFormVariant,
     289             :       UnicodeScript_kSmallFormVariant,
     290             :       UnicodeScript_kSmallFormVariant }, // 83,
     291             :     { UnicodeScript_kArabicPresentationB,
     292             :       UnicodeScript_kArabicPresentationB,
     293             :       UnicodeScript_kArabicPresentationB }, // 84,
     294             :     { UnicodeScript_kNoScript,
     295             :       UnicodeScript_kNoScript,
     296             :       UnicodeScript_kNoScript }, // 85,
     297             :     { UnicodeScript_kHalfwidthFullwidthForm,
     298             :       UnicodeScript_kHalfwidthFullwidthForm,
     299             :       UnicodeScript_kHalfwidthFullwidthForm }, // 86,
     300             :     { UnicodeScript_kScriptCount,
     301             :       UnicodeScript_kScriptCount,
     302             :       UnicodeScript_kNoScript } // 87,
     303             : };
     304             : 
     305             : sal_Int16 SAL_CALL
     306        6056 : unicode::getUnicodeScriptType( const sal_Unicode ch, const ScriptTypeList* typeList, sal_Int16 unknownType ) {
     307             : 
     308        6056 :     if (!typeList) {
     309           0 :         typeList = defaultTypeList;
     310           0 :         unknownType = UnicodeScript_kNoScript;
     311             :     }
     312             : 
     313        6056 :     sal_Int16 i = 0, type = typeList[0].to;
     314       12959 :     while (type < UnicodeScript_kScriptCount && ch > UnicodeScriptType[type][UnicodeScriptTypeTo]) {
     315         847 :         type = typeList[++i].to;
     316             :     }
     317             : 
     318        5857 :     return (type < UnicodeScript_kScriptCount &&
     319        5857 :             ch >= UnicodeScriptType[typeList[i].from][UnicodeScriptTypeFrom]) ?
     320        6119 :             typeList[i].value : unknownType;
     321             : }
     322             : 
     323             : sal_Unicode SAL_CALL
     324          10 : unicode::getUnicodeScriptStart( UnicodeScript type) {
     325          10 :     return UnicodeScriptType[type][UnicodeScriptTypeFrom];
     326             : }
     327             : 
     328             : sal_Unicode SAL_CALL
     329          10 : unicode::getUnicodeScriptEnd( UnicodeScript type) {
     330          10 :     return UnicodeScriptType[type][UnicodeScriptTypeTo];
     331             : }
     332             : 
     333             : sal_Int16 SAL_CALL
     334      127550 : unicode::getUnicodeType( const sal_Unicode ch ) {
     335             :     static sal_Unicode c = 0x00;
     336             :     static sal_Int16 r = 0x00;
     337             : 
     338      127550 :     if (ch == c) return r;
     339       25483 :     else c = ch;
     340             : 
     341       25483 :     sal_Int16 address = UnicodeTypeIndex[ch >> 8];
     342           0 :     return r = (sal_Int16)((address < UnicodeTypeNumberBlock) ? UnicodeTypeBlockValue[address] :
     343       25483 :         UnicodeTypeValue[((address - UnicodeTypeNumberBlock) << 8) + (ch & 0xff)]);
     344             : }
     345             : 
     346             : sal_uInt8 SAL_CALL
     347           0 : unicode::getUnicodeDirection( const sal_Unicode ch ) {
     348             :     static sal_Unicode c = 0x00;
     349             :     static sal_uInt8 r = 0x00;
     350             : 
     351           0 :     if (ch == c) return r;
     352           0 :     else c = ch;
     353             : 
     354           0 :     sal_Int16 address = UnicodeDirectionIndex[ch >> 8];
     355           0 :     return r = ((address < UnicodeDirectionNumberBlock) ? UnicodeDirectionBlockValue[address] :
     356           0 :         UnicodeDirectionValue[((address - UnicodeDirectionNumberBlock) << 8) + (ch & 0xff)]);
     357             : 
     358             : }
     359             : 
     360             : #define bit(name)   (1U << name)
     361             : 
     362             : #define UPPERMASK   bit(UnicodeType::UPPERCASE_LETTER)
     363             : 
     364             : #define LOWERMASK   bit(UnicodeType::LOWERCASE_LETTER)
     365             : 
     366             : #define TITLEMASK   bit(UnicodeType::TITLECASE_LETTER)
     367             : 
     368             : #define ALPHAMASK   UPPERMASK|LOWERMASK|TITLEMASK|\
     369             :             bit(UnicodeType::MODIFIER_LETTER)|\
     370             :             bit(UnicodeType::OTHER_LETTER)
     371             : 
     372             : #define SPACEMASK   bit(UnicodeType::SPACE_SEPARATOR)|\
     373             :             bit(UnicodeType::LINE_SEPARATOR)|\
     374             :             bit(UnicodeType::PARAGRAPH_SEPARATOR)
     375             : 
     376             : #define CONTROLMASK bit(UnicodeType::CONTROL)|\
     377             :             bit(UnicodeType::FORMAT)|\
     378             :             bit(UnicodeType::LINE_SEPARATOR)|\
     379             :             bit(UnicodeType::PARAGRAPH_SEPARATOR)
     380             : 
     381             : #define IsType(func, mask)  \
     382             : bool SAL_CALL func( const sal_Unicode ch) {\
     383             :     return (bit(getUnicodeType(ch)) & (mask)) != 0;\
     384             : }
     385             : 
     386       63457 : IsType(unicode::isControl, CONTROLMASK)
     387       64061 : IsType(unicode::isAlpha, ALPHAMASK)
     388          20 : IsType(unicode::isSpace, SPACEMASK)
     389             : 
     390             : #define CONTROLSPACE    bit(0x09)|bit(0x0a)|bit(0x0b)|bit(0x0c)|bit(0x0d)|\
     391             :             bit(0x1c)|bit(0x1d)|bit(0x1e)|bit(0x1f)
     392             : 
     393          20 : bool SAL_CALL unicode::isWhiteSpace( const sal_Unicode ch) {
     394          20 :     return (ch != 0xa0 && isSpace(ch)) || (ch <= 0x1F && (bit(ch) & (CONTROLSPACE)));
     395             : }
     396             : 
     397      284417 : sal_Int16 SAL_CALL unicode::getScriptClassFromUScriptCode(UScriptCode eScript)
     398             : {
     399             :     //See unicode/uscript.h
     400             :     static const sal_Int16 scriptTypes[] =
     401             :     {
     402             :         ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX,
     403             :         ScriptType::ASIAN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
     404             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN,
     405             :     // 15
     406             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN, ScriptType::COMPLEX,
     407             :         ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX,
     408             :         ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
     409             :     // 30
     410             :         ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
     411             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     412             :         ScriptType::LATIN, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     413             :     // 45
     414             :         ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
     415             :         ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
     416             :         ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     417             :     // 60
     418             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     419             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
     420             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN,
     421             :     // 75
     422             :         ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     423             :         ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     424             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     425             :     // 90
     426             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     427             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     428             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX,
     429             :     // 105
     430             :         ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     431             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     432             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN,
     433             :     // 120
     434             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     435             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK,
     436             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     437             :     // 135
     438             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     439             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     440             :         ScriptType::COMPLEX,
     441             :         ScriptType::WEAK
     442             :     };
     443             : 
     444             :     sal_Int16 nRet;
     445      284417 :     if (eScript < USCRIPT_COMMON)
     446           0 :         nRet = ScriptType::WEAK;
     447      284417 :     else if (static_cast<size_t>(eScript) >= SAL_N_ELEMENTS(scriptTypes))
     448           0 :         nRet = ScriptType::COMPLEX;         // anything new is going to be pretty wild
     449             :     else
     450      284417 :         nRet = scriptTypes[eScript];
     451      284417 :     return nRet;
     452             : }
     453             : 
     454          14 : OString SAL_CALL unicode::getExemplarLanguageForUScriptCode(UScriptCode eScript)
     455             : {
     456          14 :     OString sRet;
     457          14 :     switch (eScript)
     458             :     {
     459             :         case USCRIPT_CODE_LIMIT:
     460             :         case USCRIPT_INVALID_CODE:
     461           0 :             sRet = "zxx";
     462           0 :             break;
     463             :         case USCRIPT_COMMON:
     464             :         case USCRIPT_INHERITED:
     465           0 :             sRet = "und";
     466           0 :             break;
     467             :         case USCRIPT_MATHEMATICAL_NOTATION:
     468             :         case USCRIPT_SYMBOLS:
     469           0 :             sRet = "zxx";
     470           0 :             break;
     471             :         case USCRIPT_UNWRITTEN_LANGUAGES:
     472             :         case USCRIPT_UNKNOWN:
     473           0 :             sRet = "und";
     474           0 :             break;
     475             :         case USCRIPT_ARABIC:
     476           0 :             sRet = "ar";
     477           0 :             break;
     478             :         case USCRIPT_ARMENIAN:
     479           0 :             sRet = "hy";
     480           0 :             break;
     481             :         case USCRIPT_BENGALI:
     482           0 :             sRet = "bn";
     483           0 :             break;
     484             :         case USCRIPT_BOPOMOFO:
     485           0 :             sRet = "zh";
     486           0 :             break;
     487             :         case USCRIPT_CHEROKEE:
     488           0 :             sRet = "chr";
     489           0 :             break;
     490             :         case USCRIPT_COPTIC:
     491           0 :             sRet = "cop";
     492           0 :             break;
     493             :         case USCRIPT_CYRILLIC:
     494           0 :             sRet = "ru";
     495           0 :             break;
     496             :         case USCRIPT_DESERET:
     497           0 :             sRet = "en";
     498           0 :             break;
     499             :         case USCRIPT_DEVANAGARI:
     500           0 :             sRet = "hi";
     501           0 :             break;
     502             :         case USCRIPT_ETHIOPIC:
     503           0 :             sRet = "am";
     504           0 :             break;
     505             :         case USCRIPT_GEORGIAN:
     506           0 :             sRet = "ka";
     507           0 :             break;
     508             :         case USCRIPT_GOTHIC:
     509           0 :             sRet = "got";
     510           0 :             break;
     511             :         case USCRIPT_GREEK:
     512           0 :             sRet = "el";
     513           0 :             break;
     514             :         case USCRIPT_GUJARATI:
     515           0 :             sRet = "gu";
     516           0 :             break;
     517             :         case USCRIPT_GURMUKHI:
     518           0 :             sRet = "pa";
     519           0 :             break;
     520             :         case USCRIPT_HAN:
     521          14 :             sRet = "zh";
     522          14 :             break;
     523             :         case USCRIPT_HANGUL:
     524           0 :             sRet = "ko";
     525           0 :             break;
     526             :         case USCRIPT_HEBREW:
     527           0 :             sRet = "hr";
     528           0 :             break;
     529             :         case USCRIPT_HIRAGANA:
     530           0 :             sRet = "ja";
     531           0 :             break;
     532             :         case USCRIPT_KANNADA:
     533           0 :             sRet = "kn";
     534           0 :             break;
     535             :         case USCRIPT_KATAKANA:
     536           0 :             sRet = "ja";
     537           0 :             break;
     538             :         case USCRIPT_KHMER:
     539           0 :             sRet = "km";
     540           0 :             break;
     541             :         case USCRIPT_LAO:
     542           0 :             sRet = "lo";
     543           0 :             break;
     544             :         case USCRIPT_LATIN:
     545           0 :             sRet = "en";
     546           0 :             break;
     547             :         case USCRIPT_MALAYALAM:
     548           0 :             sRet = "ml";
     549           0 :             break;
     550             :         case USCRIPT_MONGOLIAN:
     551           0 :             sRet = "mn";
     552           0 :             break;
     553             :         case USCRIPT_MYANMAR:
     554           0 :             sRet = "my";
     555           0 :             break;
     556             :         case USCRIPT_OGHAM:
     557           0 :             sRet = "pgl";
     558           0 :             break;
     559             :         case USCRIPT_OLD_ITALIC:
     560           0 :             sRet = "osc";
     561           0 :             break;
     562             :         case USCRIPT_ORIYA:
     563           0 :             sRet = "or";
     564           0 :             break;
     565             :         case USCRIPT_RUNIC:
     566           0 :             sRet = "ang";
     567           0 :             break;
     568             :         case USCRIPT_SINHALA:
     569           0 :             sRet = "si";
     570           0 :             break;
     571             :         case USCRIPT_SYRIAC:
     572           0 :             sRet = "syr";
     573           0 :             break;
     574             :         case USCRIPT_TAMIL:
     575           0 :             sRet = "ta";
     576           0 :             break;
     577             :         case USCRIPT_TELUGU:
     578           0 :             sRet = "te";
     579           0 :             break;
     580             :         case USCRIPT_THAANA:
     581           0 :             sRet = "dv";
     582           0 :             break;
     583             :         case USCRIPT_THAI:
     584           0 :             sRet = "th";
     585           0 :             break;
     586             :         case USCRIPT_TIBETAN:
     587           0 :             sRet = "bo";
     588           0 :             break;
     589             :         case USCRIPT_CANADIAN_ABORIGINAL:
     590           0 :             sRet = "iu";
     591           0 :             break;
     592             :         case USCRIPT_YI:
     593           0 :             sRet = "ii";
     594           0 :             break;
     595             :         case USCRIPT_TAGALOG:
     596           0 :             sRet = "tl";
     597           0 :             break;
     598             :         case USCRIPT_HANUNOO:
     599           0 :             sRet = "hnn";
     600           0 :             break;
     601             :         case USCRIPT_BUHID:
     602           0 :             sRet = "bku";
     603           0 :             break;
     604             :         case USCRIPT_TAGBANWA:
     605           0 :             sRet = "tbw";
     606           0 :             break;
     607             :         case USCRIPT_BRAILLE:
     608           0 :             sRet = "en";
     609           0 :             break;
     610             :         case USCRIPT_CYPRIOT:
     611           0 :             sRet = "ecy";
     612           0 :             break;
     613             :         case USCRIPT_LIMBU:
     614           0 :             sRet = "lif";
     615           0 :             break;
     616             :         case USCRIPT_LINEAR_B:
     617           0 :             sRet = "gmy";
     618           0 :             break;
     619             :         case USCRIPT_OSMANYA:
     620           0 :             sRet = "so";
     621           0 :             break;
     622             :         case USCRIPT_SHAVIAN:
     623           0 :             sRet = "en";
     624           0 :             break;
     625             :         case USCRIPT_TAI_LE:
     626           0 :             sRet = "tdd";
     627           0 :             break;
     628             :         case USCRIPT_UGARITIC:
     629           0 :             sRet = "uga";
     630           0 :             break;
     631             :         case USCRIPT_KATAKANA_OR_HIRAGANA:
     632           0 :             sRet = "ja";
     633           0 :             break;
     634             :         case USCRIPT_BUGINESE:
     635           0 :             sRet = "bug";
     636           0 :             break;
     637             :         case USCRIPT_GLAGOLITIC:
     638           0 :             sRet = "ch";
     639           0 :             break;
     640             :         case USCRIPT_KHAROSHTHI:
     641           0 :             sRet = "pra";
     642           0 :             break;
     643             :         case USCRIPT_SYLOTI_NAGRI:
     644           0 :             sRet = "syl";
     645           0 :             break;
     646             :         case USCRIPT_NEW_TAI_LUE:
     647           0 :             sRet = "khb";
     648           0 :             break;
     649             :         case USCRIPT_TIFINAGH:
     650           0 :             sRet = "tmh";
     651           0 :             break;
     652             :         case USCRIPT_OLD_PERSIAN:
     653           0 :             sRet = "peo";
     654           0 :             break;
     655             :         case USCRIPT_BALINESE:
     656           0 :             sRet = "ban";
     657           0 :             break;
     658             :         case USCRIPT_BATAK:
     659           0 :             sRet = "btk";
     660           0 :             break;
     661             :         case USCRIPT_BLISSYMBOLS:
     662           0 :             sRet = "en";
     663           0 :             break;
     664             :         case USCRIPT_BRAHMI:
     665           0 :             sRet = "pra";
     666           0 :             break;
     667             :         case USCRIPT_CHAM:
     668           0 :             sRet = "cja";
     669           0 :             break;
     670             :         case USCRIPT_CIRTH:
     671           0 :             sRet = "sjn";
     672           0 :             break;
     673             :         case USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC:
     674           0 :             sRet = "cu";
     675           0 :             break;
     676             :         case USCRIPT_DEMOTIC_EGYPTIAN:
     677             :         case USCRIPT_HIERATIC_EGYPTIAN:
     678             :         case USCRIPT_EGYPTIAN_HIEROGLYPHS:
     679           0 :             sRet = "egy";
     680           0 :             break;
     681             :         case USCRIPT_KHUTSURI:
     682           0 :             sRet = "ka";
     683           0 :             break;
     684             :         case USCRIPT_SIMPLIFIED_HAN:
     685           0 :             sRet = "zh";
     686           0 :             break;
     687             :         case USCRIPT_TRADITIONAL_HAN:
     688           0 :             sRet = "zh";
     689           0 :             break;
     690             :         case USCRIPT_PAHAWH_HMONG:
     691           0 :             sRet = "blu";
     692           0 :             break;
     693             :         case USCRIPT_OLD_HUNGARIAN:
     694           0 :             sRet = "ohu";
     695           0 :             break;
     696             :         case USCRIPT_HARAPPAN_INDUS:
     697           0 :             sRet = "xiv";
     698           0 :             break;
     699             :         case USCRIPT_JAVANESE:
     700           0 :             sRet = "kaw";
     701           0 :             break;
     702             :         case USCRIPT_KAYAH_LI:
     703           0 :             sRet = "eky";
     704           0 :             break;
     705             :         case USCRIPT_LATIN_FRAKTUR:
     706           0 :             sRet = "de";
     707           0 :             break;
     708             :         case USCRIPT_LATIN_GAELIC:
     709           0 :             sRet = "ga";
     710           0 :             break;
     711             :         case USCRIPT_LEPCHA:
     712           0 :             sRet = "lep";
     713           0 :             break;
     714             :         case USCRIPT_LINEAR_A:
     715           0 :             sRet = "ecr";
     716           0 :             break;
     717             :         case USCRIPT_MAYAN_HIEROGLYPHS:
     718           0 :             sRet = "myn";
     719           0 :             break;
     720             :         case USCRIPT_MEROITIC:
     721           0 :             sRet = "xmr";
     722           0 :             break;
     723             :         case USCRIPT_NKO:
     724           0 :             sRet = "nqo";
     725           0 :             break;
     726             :         case USCRIPT_ORKHON:
     727           0 :             sRet = "otk";
     728           0 :             break;
     729             :         case USCRIPT_OLD_PERMIC:
     730           0 :             sRet = "kv";
     731           0 :             break;
     732             :         case USCRIPT_PHAGS_PA:
     733           0 :             sRet = "xng";
     734           0 :             break;
     735             :         case USCRIPT_PHOENICIAN:
     736           0 :             sRet = "phn";
     737           0 :             break;
     738             :         case USCRIPT_PHONETIC_POLLARD:
     739           0 :             sRet = "hmd";
     740           0 :             break;
     741             :         case USCRIPT_RONGORONGO:
     742           0 :             sRet = "rap";
     743           0 :             break;
     744             :         case USCRIPT_SARATI:
     745           0 :             sRet = "qya";
     746           0 :             break;
     747             :         case USCRIPT_ESTRANGELO_SYRIAC:
     748           0 :             sRet = "syr";
     749           0 :             break;
     750             :         case USCRIPT_WESTERN_SYRIAC:
     751           0 :             sRet = "tru";
     752           0 :             break;
     753             :         case USCRIPT_EASTERN_SYRIAC:
     754           0 :             sRet = "aii";
     755           0 :             break;
     756             :         case USCRIPT_TENGWAR:
     757           0 :             sRet = "sjn";
     758           0 :             break;
     759             :         case USCRIPT_VAI:
     760           0 :             sRet = "vai";
     761           0 :             break;
     762             :         case USCRIPT_VISIBLE_SPEECH:
     763           0 :             sRet = "en";
     764           0 :             break;
     765             :         case USCRIPT_CUNEIFORM:
     766           0 :             sRet = "akk";
     767           0 :             break;
     768             :         case USCRIPT_CARIAN:
     769           0 :             sRet = "xcr";
     770           0 :             break;
     771             :         case USCRIPT_JAPANESE:
     772           0 :             sRet = "ja";
     773           0 :             break;
     774             :         case USCRIPT_LANNA:
     775           0 :             sRet = "nod";
     776           0 :             break;
     777             :         case USCRIPT_LYCIAN:
     778           0 :             sRet = "xlc";
     779           0 :             break;
     780             :         case USCRIPT_LYDIAN:
     781           0 :             sRet = "xld";
     782           0 :             break;
     783             :         case USCRIPT_OL_CHIKI:
     784           0 :             sRet = "sat";
     785           0 :             break;
     786             :         case USCRIPT_REJANG:
     787           0 :             sRet = "rej";
     788           0 :             break;
     789             :         case USCRIPT_SAURASHTRA:
     790           0 :             sRet = "saz";
     791           0 :             break;
     792             :         case USCRIPT_SIGN_WRITING:
     793           0 :             sRet = "en";
     794           0 :             break;
     795             :         case USCRIPT_SUNDANESE:
     796           0 :             sRet = "su";
     797           0 :             break;
     798             :         case USCRIPT_MOON:
     799           0 :             sRet = "en";
     800           0 :             break;
     801             :         case USCRIPT_MEITEI_MAYEK:
     802           0 :             sRet = "mni";
     803           0 :             break;
     804             :         case USCRIPT_IMPERIAL_ARAMAIC:
     805           0 :             sRet = "arc";
     806           0 :             break;
     807             :         case USCRIPT_AVESTAN:
     808           0 :             sRet = "ae";
     809           0 :             break;
     810             :         case USCRIPT_CHAKMA:
     811           0 :             sRet = "ccp";
     812           0 :             break;
     813             :         case USCRIPT_KOREAN:
     814           0 :             sRet = "ko";
     815           0 :             break;
     816             :         case USCRIPT_KAITHI:
     817           0 :             sRet = "awa";
     818           0 :             break;
     819             :         case USCRIPT_MANICHAEAN:
     820           0 :             sRet = "xmn";
     821           0 :             break;
     822             :         case USCRIPT_INSCRIPTIONAL_PAHLAVI:
     823             :         case USCRIPT_PSALTER_PAHLAVI:
     824             :         case USCRIPT_BOOK_PAHLAVI:
     825             :         case USCRIPT_INSCRIPTIONAL_PARTHIAN:
     826           0 :             sRet = "xpr";
     827           0 :             break;
     828             :         case USCRIPT_SAMARITAN:
     829           0 :             sRet = "heb";
     830           0 :             break;
     831             :         case USCRIPT_TAI_VIET:
     832           0 :             sRet = "blt";
     833           0 :             break;
     834             :         case USCRIPT_MANDAEAN: /* Aliased to USCRIPT_MANDAIC in icu 4.6. */
     835           0 :             sRet = "mic";
     836           0 :             break;
     837             : #if (U_ICU_VERSION_MAJOR_NUM > 4) || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM >= 4)
     838             :         case USCRIPT_NABATAEAN: //no language with an assigned code yet
     839           0 :             sRet = "mis";
     840           0 :             break;
     841             :         case USCRIPT_PALMYRENE: //no language with an assigned code yet
     842           0 :             sRet = "mis";
     843           0 :             break;
     844             :         case USCRIPT_BAMUM:
     845           0 :             sRet = "bax";
     846           0 :             break;
     847             :         case USCRIPT_LISU:
     848           0 :             sRet = "lis";
     849           0 :             break;
     850             :         case USCRIPT_NAKHI_GEBA:
     851           0 :             sRet = "nxq";
     852           0 :             break;
     853             :         case USCRIPT_OLD_SOUTH_ARABIAN:
     854           0 :             sRet = "xsa";
     855           0 :             break;
     856             :         case USCRIPT_BASSA_VAH:
     857           0 :             sRet = "bsq";
     858           0 :             break;
     859             :         case USCRIPT_DUPLOYAN_SHORTAND:
     860           0 :             sRet = "fr";
     861           0 :             break;
     862             :         case USCRIPT_ELBASAN:
     863           0 :             sRet = "sq";
     864           0 :             break;
     865             :         case USCRIPT_GRANTHA:
     866           0 :             sRet = "ta";
     867           0 :             break;
     868             :         case USCRIPT_KPELLE:
     869           0 :             sRet = "kpe";
     870           0 :             break;
     871             :         case USCRIPT_LOMA:
     872           0 :             sRet = "lom";
     873           0 :             break;
     874             :         case USCRIPT_MENDE:
     875           0 :             sRet = "men";
     876           0 :             break;
     877             :         case USCRIPT_MEROITIC_CURSIVE:
     878           0 :             sRet = "xmr";
     879           0 :             break;
     880             :         case USCRIPT_OLD_NORTH_ARABIAN:
     881           0 :             sRet = "xna";
     882           0 :             break;
     883             :         case USCRIPT_SINDHI:
     884           0 :             sRet = "sd";
     885           0 :             break;
     886             :         case USCRIPT_WARANG_CITI:
     887           0 :             sRet = "hoc";
     888           0 :             break;
     889             : #endif
     890             : #if (U_ICU_VERSION_MAJOR_NUM > 4) || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM >= 8)
     891             :         case USCRIPT_AFAKA:
     892           0 :             sRet = "djk";
     893           0 :             break;
     894             :         case USCRIPT_JURCHEN:
     895           0 :             sRet = "juc";
     896           0 :             break;
     897             :         case USCRIPT_MRO:
     898           0 :             sRet = "cmr";
     899           0 :             break;
     900             :         case USCRIPT_NUSHU: //no language with an assigned code yet
     901           0 :             sRet = "mis";
     902           0 :             break;
     903             :         case USCRIPT_SHARADA:
     904           0 :             sRet = "sa";
     905           0 :             break;
     906             :         case USCRIPT_SORA_SOMPENG:
     907           0 :             sRet = "srb";
     908           0 :             break;
     909             :         case USCRIPT_TAKRI:
     910           0 :             sRet = "doi";
     911           0 :             break;
     912             :         case USCRIPT_TANGUT:
     913           0 :             sRet = "txg";
     914           0 :             break;
     915             :         case USCRIPT_WOLEAI:
     916           0 :             sRet = "woe";
     917           0 :             break;
     918             : #endif
     919             : #if (U_ICU_VERSION_MAJOR_NUM >= 49)
     920             :         case USCRIPT_ANATOLIAN_HIEROGLYPHS:
     921           0 :             sRet = "hlu";
     922           0 :             break;
     923             :         case USCRIPT_KHOJKI:
     924           0 :             sRet = "gu";
     925           0 :             break;
     926             :         case USCRIPT_TIRHUTA:
     927           0 :             sRet = "mai";
     928           0 :             break;
     929             : #endif
     930             : #if (U_ICU_VERSION_MAJOR_NUM >= 52)
     931             :         case USCRIPT_CAUCASIAN_ALBANIAN:
     932           0 :             sRet = "xag";
     933           0 :             break;
     934             :         case USCRIPT_MAHAJANI:
     935           0 :             sRet = "mwr";
     936           0 :             break;
     937             : #endif
     938             : #if (U_ICU_VERSION_MAJOR_NUM >= 54)
     939             :         case USCRIPT_AHOM:
     940           0 :             sRet = "aho";
     941           0 :             break;
     942             :         case USCRIPT_HATRAN:
     943           0 :             sRet = "qly-Hatr";
     944           0 :             break;
     945             :         case USCRIPT_MODI:
     946           0 :             sRet = "mr-Modi";
     947           0 :             break;
     948             :         case USCRIPT_MULTANI:
     949           0 :             sRet = "skr-Mutl";
     950           0 :             break;
     951             :         case USCRIPT_PAU_CIN_HAU:
     952           0 :             sRet = "ctd-Pauc";
     953           0 :             break;
     954             :         case USCRIPT_SIDDHAM:
     955           0 :             sRet = "sa-Sidd";
     956           0 :             break;
     957             : #endif
     958             :     }
     959          14 :     return sRet;
     960             : }
     961             : 
     962             : //Format a number as a percentage according to the rules of the given
     963             : //language, e.g. 100 -> "100%" for en-US vs "100 %" for de-DE
     964        1017 : OUString SAL_CALL unicode::formatPercent(double dNumber,
     965             :     const LanguageTag &rLangTag)
     966             : {
     967             :     // get a currency formatter for this locale ID
     968        1017 :     UErrorCode errorCode=U_ZERO_ERROR;
     969             : 
     970        1017 :     LanguageTag aLangTag(rLangTag);
     971             : 
     972             :     // As of CLDR Version 24 these languages were not listed as using spacing
     973             :     // between number and % but are reported as such by our l10n groups
     974             :     // http://www.unicode.org/cldr/charts/24/by_type/numbers.number_formatting_patterns.html
     975             :     // so format using French which has the desired rules
     976        1017 :     if (aLangTag.getLanguage() == "es" || aLangTag.getLanguage() == "sl")
     977           0 :         aLangTag = LanguageTag("fr-FR");
     978             : 
     979        2034 :     icu::Locale aLocale = LanguageTagIcu::getIcuLocale(aLangTag);
     980             : 
     981             :     boost::scoped_ptr<NumberFormat> xF(
     982        2034 :         NumberFormat::createPercentInstance(aLocale, errorCode));
     983        1017 :     if(U_FAILURE(errorCode))
     984             :     {
     985             :         SAL_WARN("i18n", "NumberFormat::createPercentInstance failed");
     986           0 :         return OUString::number(dNumber) + "%";
     987             :     }
     988             : 
     989        2034 :     UnicodeString output;
     990        1017 :     xF->format(dNumber/100, output);
     991        1017 :     OUString aRet(reinterpret_cast<const sal_Unicode *>(output.getBuffer()),
     992        3051 :         output.length());
     993        1017 :     if (rLangTag.getLanguage() == "de")
     994             :     {
     995             :         //narrow no-break space instead of (normal) no-break space
     996           0 :         return aRet.replace(0x00A0, 0x202F);
     997             :     }
     998        2034 :     return aRet;
     999             : }
    1000             : 
    1001             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11