LCOV - code coverage report
Current view: top level - usr/local/src/libreoffice/i18nutil/source/utility - unicode.cxx (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 26 346 7.5 %
Date: 2013-07-09 Functions: 9 11 81.8 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include <com/sun/star/i18n/UnicodeType.hpp>
      21             : #include <com/sun/star/i18n/KCharacterType.hpp>
      22             : #include <com/sun/star/i18n/ScriptType.hpp>
      23             : #include <i18nutil/unicode.hxx>
      24             : #include "unicode_data.h"
      25             : 
      26             : // Workaround for glibc braindamage:
      27             : // glibc 2.4's langinfo.h does "#define CURRENCY_SYMBOL __CURRENCY_SYMBOL"
      28             : // which (obviously) breaks UnicodeType::CURRENCY_SYMBOL
      29             : #undef CURRENCY_SYMBOL
      30             : 
      31             : using namespace ::com::sun::star::i18n;
      32             : 
      33             : static ScriptTypeList defaultTypeList[] = {
      34             :     { UnicodeScript_kBasicLatin,
      35             :       UnicodeScript_kBasicLatin,
      36             :       UnicodeScript_kBasicLatin },      // 0,
      37             :     { UnicodeScript_kLatin1Supplement,
      38             :       UnicodeScript_kLatin1Supplement,
      39             :       UnicodeScript_kLatin1Supplement },// 1,
      40             :     { UnicodeScript_kLatinExtendedA,
      41             :       UnicodeScript_kLatinExtendedA,
      42             :       UnicodeScript_kLatinExtendedA }, // 2,
      43             :     { UnicodeScript_kLatinExtendedB,
      44             :       UnicodeScript_kLatinExtendedB,
      45             :       UnicodeScript_kLatinExtendedB }, // 3,
      46             :     { UnicodeScript_kIPAExtension,
      47             :       UnicodeScript_kIPAExtension,
      48             :       UnicodeScript_kIPAExtension }, // 4,
      49             :     { UnicodeScript_kSpacingModifier,
      50             :       UnicodeScript_kSpacingModifier,
      51             :       UnicodeScript_kSpacingModifier }, // 5,
      52             :     { UnicodeScript_kCombiningDiacritical,
      53             :       UnicodeScript_kCombiningDiacritical,
      54             :       UnicodeScript_kCombiningDiacritical }, // 6,
      55             :     { UnicodeScript_kGreek,
      56             :       UnicodeScript_kGreek,
      57             :       UnicodeScript_kGreek }, // 7,
      58             :     { UnicodeScript_kCyrillic,
      59             :       UnicodeScript_kCyrillic,
      60             :       UnicodeScript_kCyrillic }, // 8,
      61             :     { UnicodeScript_kArmenian,
      62             :       UnicodeScript_kArmenian,
      63             :       UnicodeScript_kArmenian }, // 9,
      64             :     { UnicodeScript_kHebrew,
      65             :       UnicodeScript_kHebrew,
      66             :       UnicodeScript_kHebrew }, // 10,
      67             :     { UnicodeScript_kArabic,
      68             :       UnicodeScript_kArabic,
      69             :       UnicodeScript_kArabic }, // 11,
      70             :     { UnicodeScript_kSyriac,
      71             :       UnicodeScript_kSyriac,
      72             :       UnicodeScript_kSyriac }, // 12,
      73             :     { UnicodeScript_kThaana,
      74             :       UnicodeScript_kThaana,
      75             :       UnicodeScript_kThaana }, // 13,
      76             :     { UnicodeScript_kDevanagari,
      77             :       UnicodeScript_kDevanagari,
      78             :       UnicodeScript_kDevanagari }, // 14,
      79             :     { UnicodeScript_kBengali,
      80             :       UnicodeScript_kBengali,
      81             :       UnicodeScript_kBengali }, // 15,
      82             :     { UnicodeScript_kGurmukhi,
      83             :       UnicodeScript_kGurmukhi,
      84             :       UnicodeScript_kGurmukhi }, // 16,
      85             :     { UnicodeScript_kGujarati,
      86             :       UnicodeScript_kGujarati,
      87             :       UnicodeScript_kGujarati }, // 17,
      88             :     { UnicodeScript_kOriya,
      89             :       UnicodeScript_kOriya,
      90             :       UnicodeScript_kOriya }, // 18,
      91             :     { UnicodeScript_kTamil,
      92             :       UnicodeScript_kTamil,
      93             :       UnicodeScript_kTamil }, // 19,
      94             :     { UnicodeScript_kTelugu,
      95             :       UnicodeScript_kTelugu,
      96             :       UnicodeScript_kTelugu }, // 20,
      97             :     { UnicodeScript_kKannada,
      98             :       UnicodeScript_kKannada,
      99             :       UnicodeScript_kKannada }, // 21,
     100             :     { UnicodeScript_kMalayalam,
     101             :       UnicodeScript_kMalayalam,
     102             :       UnicodeScript_kMalayalam }, // 22,
     103             :     { UnicodeScript_kSinhala,
     104             :       UnicodeScript_kSinhala,
     105             :       UnicodeScript_kSinhala }, // 23,
     106             :     { UnicodeScript_kThai,
     107             :       UnicodeScript_kThai,
     108             :       UnicodeScript_kThai }, // 24,
     109             :     { UnicodeScript_kLao,
     110             :       UnicodeScript_kLao,
     111             :       UnicodeScript_kLao }, // 25,
     112             :     { UnicodeScript_kTibetan,
     113             :       UnicodeScript_kTibetan,
     114             :       UnicodeScript_kTibetan }, // 26,
     115             :     { UnicodeScript_kMyanmar,
     116             :       UnicodeScript_kMyanmar,
     117             :       UnicodeScript_kMyanmar }, // 27,
     118             :     { UnicodeScript_kGeorgian,
     119             :       UnicodeScript_kGeorgian,
     120             :       UnicodeScript_kGeorgian }, // 28,
     121             :     { UnicodeScript_kHangulJamo,
     122             :       UnicodeScript_kHangulJamo,
     123             :       UnicodeScript_kHangulJamo }, // 29,
     124             :     { UnicodeScript_kEthiopic,
     125             :       UnicodeScript_kEthiopic,
     126             :       UnicodeScript_kEthiopic }, // 30,
     127             :     { UnicodeScript_kCherokee,
     128             :       UnicodeScript_kCherokee,
     129             :       UnicodeScript_kCherokee }, // 31,
     130             :     { UnicodeScript_kUnifiedCanadianAboriginalSyllabics,
     131             :       UnicodeScript_kUnifiedCanadianAboriginalSyllabics,
     132             :       UnicodeScript_kUnifiedCanadianAboriginalSyllabics }, // 32,
     133             :     { UnicodeScript_kOgham,
     134             :       UnicodeScript_kOgham,
     135             :       UnicodeScript_kOgham }, // 33,
     136             :     { UnicodeScript_kRunic,
     137             :       UnicodeScript_kRunic,
     138             :       UnicodeScript_kRunic }, // 34,
     139             :     { UnicodeScript_kKhmer,
     140             :       UnicodeScript_kKhmer,
     141             :       UnicodeScript_kKhmer }, // 35,
     142             :     { UnicodeScript_kMongolian,
     143             :       UnicodeScript_kMongolian,
     144             :       UnicodeScript_kMongolian }, // 36,
     145             :     { UnicodeScript_kLatinExtendedAdditional,
     146             :       UnicodeScript_kLatinExtendedAdditional,
     147             :       UnicodeScript_kLatinExtendedAdditional }, // 37,
     148             :     { UnicodeScript_kGreekExtended,
     149             :       UnicodeScript_kGreekExtended,
     150             :       UnicodeScript_kGreekExtended }, // 38,
     151             :     { UnicodeScript_kGeneralPunctuation,
     152             :       UnicodeScript_kGeneralPunctuation,
     153             :       UnicodeScript_kGeneralPunctuation }, // 39,
     154             :     { UnicodeScript_kSuperSubScript,
     155             :       UnicodeScript_kSuperSubScript,
     156             :       UnicodeScript_kSuperSubScript }, // 40,
     157             :     { UnicodeScript_kCurrencySymbolScript,
     158             :       UnicodeScript_kCurrencySymbolScript,
     159             :       UnicodeScript_kCurrencySymbolScript }, // 41,
     160             :     { UnicodeScript_kSymbolCombiningMark,
     161             :       UnicodeScript_kSymbolCombiningMark,
     162             :       UnicodeScript_kSymbolCombiningMark }, // 42,
     163             :     { UnicodeScript_kLetterlikeSymbol,
     164             :       UnicodeScript_kLetterlikeSymbol,
     165             :       UnicodeScript_kLetterlikeSymbol }, // 43,
     166             :     { UnicodeScript_kNumberForm,
     167             :       UnicodeScript_kNumberForm,
     168             :       UnicodeScript_kNumberForm }, // 44,
     169             :     { UnicodeScript_kArrow,
     170             :       UnicodeScript_kArrow,
     171             :       UnicodeScript_kArrow }, // 45,
     172             :     { UnicodeScript_kMathOperator,
     173             :       UnicodeScript_kMathOperator,
     174             :       UnicodeScript_kMathOperator }, // 46,
     175             :     { UnicodeScript_kMiscTechnical,
     176             :       UnicodeScript_kMiscTechnical,
     177             :       UnicodeScript_kMiscTechnical }, // 47,
     178             :     { UnicodeScript_kControlPicture,
     179             :       UnicodeScript_kControlPicture,
     180             :       UnicodeScript_kControlPicture }, // 48,
     181             :     { UnicodeScript_kOpticalCharacter,
     182             :       UnicodeScript_kOpticalCharacter,
     183             :       UnicodeScript_kOpticalCharacter }, // 49,
     184             :     { UnicodeScript_kEnclosedAlphanumeric,
     185             :       UnicodeScript_kEnclosedAlphanumeric,
     186             :       UnicodeScript_kEnclosedAlphanumeric }, // 50,
     187             :     { UnicodeScript_kBoxDrawing,
     188             :       UnicodeScript_kBoxDrawing,
     189             :       UnicodeScript_kBoxDrawing }, // 51,
     190             :     { UnicodeScript_kBlockElement,
     191             :       UnicodeScript_kBlockElement,
     192             :       UnicodeScript_kBlockElement }, // 52,
     193             :     { UnicodeScript_kGeometricShape,
     194             :       UnicodeScript_kGeometricShape,
     195             :       UnicodeScript_kGeometricShape }, // 53,
     196             :     { UnicodeScript_kMiscSymbol,
     197             :       UnicodeScript_kMiscSymbol,
     198             :       UnicodeScript_kMiscSymbol }, // 54,
     199             :     { UnicodeScript_kDingbat,
     200             :       UnicodeScript_kDingbat,
     201             :       UnicodeScript_kDingbat }, // 55,
     202             :     { UnicodeScript_kBraillePatterns,
     203             :       UnicodeScript_kBraillePatterns,
     204             :       UnicodeScript_kBraillePatterns }, // 56,
     205             :     { UnicodeScript_kCJKRadicalsSupplement,
     206             :       UnicodeScript_kCJKRadicalsSupplement,
     207             :       UnicodeScript_kCJKRadicalsSupplement }, // 57,
     208             :     { UnicodeScript_kKangxiRadicals,
     209             :       UnicodeScript_kKangxiRadicals,
     210             :       UnicodeScript_kKangxiRadicals }, // 58,
     211             :     { UnicodeScript_kIdeographicDescriptionCharacters,
     212             :       UnicodeScript_kIdeographicDescriptionCharacters,
     213             :       UnicodeScript_kIdeographicDescriptionCharacters }, // 59,
     214             :     { UnicodeScript_kCJKSymbolPunctuation,
     215             :       UnicodeScript_kCJKSymbolPunctuation,
     216             :       UnicodeScript_kCJKSymbolPunctuation }, // 60,
     217             :     { UnicodeScript_kHiragana,
     218             :       UnicodeScript_kHiragana,
     219             :       UnicodeScript_kHiragana }, // 61,
     220             :     { UnicodeScript_kKatakana,
     221             :       UnicodeScript_kKatakana,
     222             :       UnicodeScript_kKatakana }, // 62,
     223             :     { UnicodeScript_kBopomofo,
     224             :       UnicodeScript_kBopomofo,
     225             :       UnicodeScript_kBopomofo }, // 63,
     226             :     { UnicodeScript_kHangulCompatibilityJamo,
     227             :       UnicodeScript_kHangulCompatibilityJamo,
     228             :       UnicodeScript_kHangulCompatibilityJamo }, // 64,
     229             :     { UnicodeScript_kKanbun,
     230             :       UnicodeScript_kKanbun,
     231             :       UnicodeScript_kKanbun }, // 65,
     232             :     { UnicodeScript_kBopomofoExtended,
     233             :       UnicodeScript_kBopomofoExtended,
     234             :       UnicodeScript_kBopomofoExtended }, // 66,
     235             :     { UnicodeScript_kEnclosedCJKLetterMonth,
     236             :       UnicodeScript_kEnclosedCJKLetterMonth,
     237             :       UnicodeScript_kEnclosedCJKLetterMonth }, // 67,
     238             :     { UnicodeScript_kCJKCompatibility,
     239             :       UnicodeScript_kCJKCompatibility,
     240             :       UnicodeScript_kCJKCompatibility }, // 68,
     241             :     { UnicodeScript_k_CJKUnifiedIdeographsExtensionA,
     242             :       UnicodeScript_k_CJKUnifiedIdeographsExtensionA,
     243             :       UnicodeScript_k_CJKUnifiedIdeographsExtensionA }, // 69,
     244             :     { UnicodeScript_kCJKUnifiedIdeograph,
     245             :       UnicodeScript_kCJKUnifiedIdeograph,
     246             :       UnicodeScript_kCJKUnifiedIdeograph }, // 70,
     247             :     { UnicodeScript_kYiSyllables,
     248             :       UnicodeScript_kYiSyllables,
     249             :       UnicodeScript_kYiSyllables }, // 71,
     250             :     { UnicodeScript_kYiRadicals,
     251             :       UnicodeScript_kYiRadicals,
     252             :       UnicodeScript_kYiRadicals }, // 72,
     253             :     { UnicodeScript_kHangulSyllable,
     254             :       UnicodeScript_kHangulSyllable,
     255             :       UnicodeScript_kHangulSyllable }, // 73,
     256             :     { UnicodeScript_kHighSurrogate,
     257             :       UnicodeScript_kHighSurrogate,
     258             :       UnicodeScript_kHighSurrogate }, // 74,
     259             :     { UnicodeScript_kHighPrivateUseSurrogate,
     260             :       UnicodeScript_kHighPrivateUseSurrogate,
     261             :       UnicodeScript_kHighPrivateUseSurrogate }, // 75,
     262             :     { UnicodeScript_kLowSurrogate,
     263             :       UnicodeScript_kLowSurrogate,
     264             :       UnicodeScript_kLowSurrogate }, // 76,
     265             :     { UnicodeScript_kPrivateUse,
     266             :       UnicodeScript_kPrivateUse,
     267             :       UnicodeScript_kPrivateUse }, // 77,
     268             :     { UnicodeScript_kCJKCompatibilityIdeograph,
     269             :       UnicodeScript_kCJKCompatibilityIdeograph,
     270             :       UnicodeScript_kCJKCompatibilityIdeograph }, // 78,
     271             :     { UnicodeScript_kAlphabeticPresentation,
     272             :       UnicodeScript_kAlphabeticPresentation,
     273             :       UnicodeScript_kAlphabeticPresentation }, // 79,
     274             :     { UnicodeScript_kArabicPresentationA,
     275             :       UnicodeScript_kArabicPresentationA,
     276             :       UnicodeScript_kArabicPresentationA }, // 80,
     277             :     { UnicodeScript_kCombiningHalfMark,
     278             :       UnicodeScript_kCombiningHalfMark,
     279             :       UnicodeScript_kCombiningHalfMark }, // 81,
     280             :     { UnicodeScript_kCJKCompatibilityForm,
     281             :       UnicodeScript_kCJKCompatibilityForm,
     282             :       UnicodeScript_kCJKCompatibilityForm }, // 82,
     283             :     { UnicodeScript_kSmallFormVariant,
     284             :       UnicodeScript_kSmallFormVariant,
     285             :       UnicodeScript_kSmallFormVariant }, // 83,
     286             :     { UnicodeScript_kArabicPresentationB,
     287             :       UnicodeScript_kArabicPresentationB,
     288             :       UnicodeScript_kArabicPresentationB }, // 84,
     289             :     { UnicodeScript_kNoScript,
     290             :       UnicodeScript_kNoScript,
     291             :       UnicodeScript_kNoScript }, // 85,
     292             :     { UnicodeScript_kHalfwidthFullwidthForm,
     293             :       UnicodeScript_kHalfwidthFullwidthForm,
     294             :       UnicodeScript_kHalfwidthFullwidthForm }, // 86,
     295             :     { UnicodeScript_kScriptCount,
     296             :       UnicodeScript_kScriptCount,
     297             :       UnicodeScript_kNoScript } // 87,
     298             : };
     299             : 
     300             : sal_Int16 SAL_CALL
     301          18 : unicode::getUnicodeScriptType( const sal_Unicode ch, ScriptTypeList* typeList, sal_Int16 unknownType ) {
     302             : 
     303          18 :     if (!typeList) {
     304           0 :         typeList = defaultTypeList;
     305           0 :         unknownType = UnicodeScript_kNoScript;
     306             :     }
     307             : 
     308          18 :     sal_Int16 i = 0, type = typeList[0].to;
     309          36 :     while (type < UnicodeScript_kScriptCount && ch > UnicodeScriptType[type][UnicodeScriptTypeTo]) {
     310           0 :         type = typeList[++i].to;
     311             :     }
     312             : 
     313          18 :     return (type < UnicodeScript_kScriptCount &&
     314          18 :             ch >= UnicodeScriptType[typeList[i].from][UnicodeScriptTypeFrom]) ?
     315          30 :             typeList[i].value : unknownType;
     316             : }
     317             : 
     318             : sal_Unicode SAL_CALL
     319          20 : unicode::getUnicodeScriptStart( UnicodeScript type) {
     320          20 :     return UnicodeScriptType[type][UnicodeScriptTypeFrom];
     321             : }
     322             : 
     323             : sal_Unicode SAL_CALL
     324          20 : unicode::getUnicodeScriptEnd( UnicodeScript type) {
     325          20 :     return UnicodeScriptType[type][UnicodeScriptTypeTo];
     326             : }
     327             : 
     328             : sal_Int16 SAL_CALL
     329       10924 : unicode::getUnicodeType( const sal_Unicode ch ) {
     330             :     static sal_Unicode c = 0x00;
     331             :     static sal_Int16 r = 0x00;
     332             : 
     333       10924 :     if (ch == c) return r;
     334        3593 :     else c = ch;
     335             : 
     336        3593 :     sal_Int16 address = UnicodeTypeIndex[ch >> 8];
     337           0 :     return r = (sal_Int16)((address < UnicodeTypeNumberBlock) ? UnicodeTypeBlockValue[address] :
     338        3593 :         UnicodeTypeValue[((address - UnicodeTypeNumberBlock) << 8) + (ch & 0xff)]);
     339             : }
     340             : 
     341             : sal_uInt8 SAL_CALL
     342           0 : unicode::getUnicodeDirection( const sal_Unicode ch ) {
     343             :     static sal_Unicode c = 0x00;
     344             :     static sal_uInt8 r = 0x00;
     345             : 
     346           0 :     if (ch == c) return r;
     347           0 :     else c = ch;
     348             : 
     349           0 :     sal_Int16 address = UnicodeDirectionIndex[ch >> 8];
     350           0 :     return r = ((address < UnicodeDirectionNumberBlock) ? UnicodeDirectionBlockValue[address] :
     351           0 :         UnicodeDirectionValue[((address - UnicodeDirectionNumberBlock) << 8) + (ch & 0xff)]);
     352             : 
     353             : }
     354             : 
     355             : #define bit(name)   (1 << name)
     356             : 
     357             : #define UPPERMASK   bit(UnicodeType::UPPERCASE_LETTER)
     358             : 
     359             : #define LOWERMASK   bit(UnicodeType::LOWERCASE_LETTER)
     360             : 
     361             : #define TITLEMASK   bit(UnicodeType::TITLECASE_LETTER)
     362             : 
     363             : #define ALPHAMASK   UPPERMASK|LOWERMASK|TITLEMASK|\
     364             :             bit(UnicodeType::MODIFIER_LETTER)|\
     365             :             bit(UnicodeType::OTHER_LETTER)
     366             : 
     367             : #define SPACEMASK   bit(UnicodeType::SPACE_SEPARATOR)|\
     368             :             bit(UnicodeType::LINE_SEPARATOR)|\
     369             :             bit(UnicodeType::PARAGRAPH_SEPARATOR)
     370             : 
     371             : #define CONTROLMASK bit(UnicodeType::CONTROL)|\
     372             :             bit(UnicodeType::FORMAT)|\
     373             :             bit(UnicodeType::LINE_SEPARATOR)|\
     374             :             bit(UnicodeType::PARAGRAPH_SEPARATOR)
     375             : 
     376             : #define IsType(func, mask)  \
     377             : sal_Bool SAL_CALL func( const sal_Unicode ch) {\
     378             :     return (bit(getUnicodeType(ch)) & (mask)) != 0;\
     379             : }
     380             : 
     381        5052 : IsType(unicode::isControl, CONTROLMASK)
     382        5820 : IsType(unicode::isAlpha, ALPHAMASK)
     383          40 : IsType(unicode::isSpace, SPACEMASK)
     384             : 
     385             : #define CONTROLSPACE    bit(0x09)|bit(0x0a)|bit(0x0b)|bit(0x0c)|bit(0x0d)|\
     386             :             bit(0x1c)|bit(0x1d)|bit(0x1e)|bit(0x1f)
     387             : 
     388          40 : sal_Bool SAL_CALL unicode::isWhiteSpace( const sal_Unicode ch) {
     389          40 :     return (ch != 0xa0 && isSpace(ch)) || (ch <= 0x1F && (bit(ch) & (CONTROLSPACE)));
     390             : }
     391             : 
     392        7712 : sal_Int16 SAL_CALL unicode::getScriptClassFromUScriptCode(UScriptCode eScript)
     393             : {
     394             :     //See unicode/uscript.h
     395             :     static sal_Int16 scriptTypes[] =
     396             :     {
     397             :         ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX,
     398             :         ScriptType::ASIAN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
     399             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN,
     400             :     // 15
     401             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN, ScriptType::COMPLEX,
     402             :         ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX,
     403             :         ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
     404             :     // 30
     405             :         ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
     406             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     407             :         ScriptType::LATIN, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     408             :     // 45
     409             :         ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
     410             :         ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
     411             :         ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     412             :     // 60
     413             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     414             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
     415             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN,
     416             :     // 75
     417             :         ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     418             :         ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     419             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     420             :     // 90
     421             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     422             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     423             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX,
     424             :     // 105
     425             :         ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     426             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     427             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN,
     428             :     // 120
     429             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     430             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK,
     431             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     432             :     // 135
     433             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     434             :         ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
     435             :         ScriptType::COMPLEX,
     436             :         ScriptType::WEAK
     437             :     };
     438             : 
     439             :     sal_Int16 nRet;
     440        7712 :     if (eScript < USCRIPT_COMMON)
     441           0 :         nRet = ScriptType::WEAK;
     442        7712 :     else if (static_cast<size_t>(eScript) >= SAL_N_ELEMENTS(scriptTypes))
     443           0 :         nRet = ScriptType::COMPLEX;         // anything new is going to be pretty wild
     444             :     else
     445        7712 :         nRet = scriptTypes[eScript];
     446        7712 :     return nRet;
     447             : }
     448             : 
     449           0 : OString SAL_CALL unicode::getExemplerLanguageForUScriptCode(UScriptCode eScript)
     450             : {
     451           0 :     OString sRet;
     452           0 :     switch (eScript)
     453             :     {
     454             :         case USCRIPT_CODE_LIMIT:
     455             :         case USCRIPT_INVALID_CODE:
     456           0 :             sRet = "zxx";
     457           0 :             break;
     458             :         case USCRIPT_COMMON:
     459             :         case USCRIPT_INHERITED:
     460           0 :             sRet = "und";
     461           0 :             break;
     462             :         case USCRIPT_MATHEMATICAL_NOTATION:
     463             :         case USCRIPT_SYMBOLS:
     464           0 :             sRet = "zxx";
     465           0 :             break;
     466             :         case USCRIPT_UNWRITTEN_LANGUAGES:
     467             :         case USCRIPT_UNKNOWN:
     468           0 :             sRet = "und";
     469           0 :             break;
     470             :         case USCRIPT_ARABIC:
     471           0 :             sRet = "ar";
     472           0 :             break;
     473             :         case USCRIPT_ARMENIAN:
     474           0 :             sRet = "hy";
     475           0 :             break;
     476             :         case USCRIPT_BENGALI:
     477           0 :             sRet = "bn";
     478           0 :             break;
     479             :         case USCRIPT_BOPOMOFO:
     480           0 :             sRet = "zh";
     481           0 :             break;
     482             :         case USCRIPT_CHEROKEE:
     483           0 :             sRet = "chr";
     484           0 :             break;
     485             :         case USCRIPT_COPTIC:
     486           0 :             sRet = "cop";
     487           0 :             break;
     488             :         case USCRIPT_CYRILLIC:
     489           0 :             sRet = "ru";
     490           0 :             break;
     491             :         case USCRIPT_DESERET:
     492           0 :             sRet = "en";
     493           0 :             break;
     494             :         case USCRIPT_DEVANAGARI:
     495           0 :             sRet = "hi";
     496           0 :             break;
     497             :         case USCRIPT_ETHIOPIC:
     498           0 :             sRet = "am";
     499           0 :             break;
     500             :         case USCRIPT_GEORGIAN:
     501           0 :             sRet = "ka";
     502           0 :             break;
     503             :         case USCRIPT_GOTHIC:
     504           0 :             sRet = "got";
     505           0 :             break;
     506             :         case USCRIPT_GREEK:
     507           0 :             sRet = "el";
     508           0 :             break;
     509             :         case USCRIPT_GUJARATI:
     510           0 :             sRet = "gu";
     511           0 :             break;
     512             :         case USCRIPT_GURMUKHI:
     513           0 :             sRet = "pa";
     514           0 :             break;
     515             :         case USCRIPT_HAN:
     516           0 :             sRet = "zh";
     517           0 :             break;
     518             :         case USCRIPT_HANGUL:
     519           0 :             sRet = "ko";
     520           0 :             break;
     521             :         case USCRIPT_HEBREW:
     522           0 :             sRet = "hr";
     523           0 :             break;
     524             :         case USCRIPT_HIRAGANA:
     525           0 :             sRet = "ja";
     526           0 :             break;
     527             :         case USCRIPT_KANNADA:
     528           0 :             sRet = "kn";
     529           0 :             break;
     530             :         case USCRIPT_KATAKANA:
     531           0 :             sRet = "ja";
     532           0 :             break;
     533             :         case USCRIPT_KHMER:
     534           0 :             sRet = "km";
     535           0 :             break;
     536             :         case USCRIPT_LAO:
     537           0 :             sRet = "lo";
     538           0 :             break;
     539             :         case USCRIPT_LATIN:
     540           0 :             sRet = "en";
     541           0 :             break;
     542             :         case USCRIPT_MALAYALAM:
     543           0 :             sRet = "ml";
     544           0 :             break;
     545             :         case USCRIPT_MONGOLIAN:
     546           0 :             sRet = "mn";
     547           0 :             break;
     548             :         case USCRIPT_MYANMAR:
     549           0 :             sRet = "my";
     550           0 :             break;
     551             :         case USCRIPT_OGHAM:
     552           0 :             sRet = "pgl";
     553           0 :             break;
     554             :         case USCRIPT_OLD_ITALIC:
     555           0 :             sRet = "osc";
     556           0 :             break;
     557             :         case USCRIPT_ORIYA:
     558           0 :             sRet = "or";
     559           0 :             break;
     560             :         case USCRIPT_RUNIC:
     561           0 :             sRet = "ang";
     562           0 :             break;
     563             :         case USCRIPT_SINHALA:
     564           0 :             sRet = "si";
     565           0 :             break;
     566             :         case USCRIPT_SYRIAC:
     567           0 :             sRet = "syr";
     568           0 :             break;
     569             :         case USCRIPT_TAMIL:
     570           0 :             sRet = "ta";
     571           0 :             break;
     572             :         case USCRIPT_TELUGU:
     573           0 :             sRet = "te";
     574           0 :             break;
     575             :         case USCRIPT_THAANA:
     576           0 :             sRet = "dv";
     577           0 :             break;
     578             :         case USCRIPT_THAI:
     579           0 :             sRet = "th";
     580           0 :             break;
     581             :         case USCRIPT_TIBETAN:
     582           0 :             sRet = "bo";
     583           0 :             break;
     584             :         case USCRIPT_CANADIAN_ABORIGINAL:
     585           0 :             sRet = "iu";
     586           0 :             break;
     587             :         case USCRIPT_YI:
     588           0 :             sRet = "ii";
     589           0 :             break;
     590             :         case USCRIPT_TAGALOG:
     591           0 :             sRet = "tl";
     592           0 :             break;
     593             :         case USCRIPT_HANUNOO:
     594           0 :             sRet = "hnn";
     595           0 :             break;
     596             :         case USCRIPT_BUHID:
     597           0 :             sRet = "bku";
     598           0 :             break;
     599             :         case USCRIPT_TAGBANWA:
     600           0 :             sRet = "tbw";
     601           0 :             break;
     602             :         case USCRIPT_BRAILLE:
     603           0 :             sRet = "en";
     604           0 :             break;
     605             :         case USCRIPT_CYPRIOT:
     606           0 :             sRet = "ecy";
     607           0 :             break;
     608             :         case USCRIPT_LIMBU:
     609           0 :             sRet = "lif";
     610           0 :             break;
     611             :         case USCRIPT_LINEAR_B:
     612           0 :             sRet = "gmy";
     613           0 :             break;
     614             :         case USCRIPT_OSMANYA:
     615           0 :             sRet = "so";
     616           0 :             break;
     617             :         case USCRIPT_SHAVIAN:
     618           0 :             sRet = "en";
     619           0 :             break;
     620             :         case USCRIPT_TAI_LE:
     621           0 :             sRet = "tdd";
     622           0 :             break;
     623             :         case USCRIPT_UGARITIC:
     624           0 :             sRet = "uga";
     625           0 :             break;
     626             :         case USCRIPT_KATAKANA_OR_HIRAGANA:
     627           0 :             sRet = "ja";
     628           0 :             break;
     629             :         case USCRIPT_BUGINESE:
     630           0 :             sRet = "bug";
     631           0 :             break;
     632             :         case USCRIPT_GLAGOLITIC:
     633           0 :             sRet = "ch";
     634           0 :             break;
     635             :         case USCRIPT_KHAROSHTHI:
     636           0 :             sRet = "pra";
     637           0 :             break;
     638             :         case USCRIPT_SYLOTI_NAGRI:
     639           0 :             sRet = "syl";
     640           0 :             break;
     641             :         case USCRIPT_NEW_TAI_LUE:
     642           0 :             sRet = "khb";
     643           0 :             break;
     644             :         case USCRIPT_TIFINAGH:
     645           0 :             sRet = "tmh";
     646           0 :             break;
     647             :         case USCRIPT_OLD_PERSIAN:
     648           0 :             sRet = "peo";
     649           0 :             break;
     650             :         case USCRIPT_BALINESE:
     651           0 :             sRet = "ban";
     652           0 :             break;
     653             :         case USCRIPT_BATAK:
     654           0 :             sRet = "btk";
     655           0 :             break;
     656             :         case USCRIPT_BLISSYMBOLS:
     657           0 :             sRet = "en";
     658           0 :             break;
     659             :         case USCRIPT_BRAHMI:
     660           0 :             sRet = "pra";
     661           0 :             break;
     662             :         case USCRIPT_CHAM:
     663           0 :             sRet = "cja";
     664           0 :             break;
     665             :         case USCRIPT_CIRTH:
     666           0 :             sRet = "sjn";
     667           0 :             break;
     668             :         case USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC:
     669           0 :             sRet = "cu";
     670           0 :             break;
     671             :         case USCRIPT_DEMOTIC_EGYPTIAN:
     672             :         case USCRIPT_HIERATIC_EGYPTIAN:
     673             :         case USCRIPT_EGYPTIAN_HIEROGLYPHS:
     674           0 :             sRet = "egy";
     675           0 :             break;
     676             :         case USCRIPT_KHUTSURI:
     677           0 :             sRet = "ka";
     678           0 :             break;
     679             :         case USCRIPT_SIMPLIFIED_HAN:
     680           0 :             sRet = "zh";
     681           0 :             break;
     682             :         case USCRIPT_TRADITIONAL_HAN:
     683           0 :             sRet = "zh";
     684           0 :             break;
     685             :         case USCRIPT_PAHAWH_HMONG:
     686           0 :             sRet = "blu";
     687           0 :             break;
     688             :         case USCRIPT_OLD_HUNGARIAN:
     689           0 :             sRet = "ohu";
     690           0 :             break;
     691             :         case USCRIPT_HARAPPAN_INDUS:
     692           0 :             sRet = "xiv";
     693           0 :             break;
     694             :         case USCRIPT_JAVANESE:
     695           0 :             sRet = "kaw";
     696           0 :             break;
     697             :         case USCRIPT_KAYAH_LI:
     698           0 :             sRet = "eky";
     699           0 :             break;
     700             :         case USCRIPT_LATIN_FRAKTUR:
     701           0 :             sRet = "de";
     702           0 :             break;
     703             :         case USCRIPT_LATIN_GAELIC:
     704           0 :             sRet = "ga";
     705           0 :             break;
     706             :         case USCRIPT_LEPCHA:
     707           0 :             sRet = "lep";
     708           0 :             break;
     709             :         case USCRIPT_LINEAR_A:
     710           0 :             sRet = "ecr";
     711           0 :             break;
     712             :         case USCRIPT_MAYAN_HIEROGLYPHS:
     713           0 :             sRet = "myn";
     714           0 :             break;
     715             :         case USCRIPT_MEROITIC:
     716           0 :             sRet = "xmr";
     717           0 :             break;
     718             :         case USCRIPT_NKO:
     719           0 :             sRet = "nqo";
     720           0 :             break;
     721             :         case USCRIPT_ORKHON:
     722           0 :             sRet = "otk";
     723           0 :             break;
     724             :         case USCRIPT_OLD_PERMIC:
     725           0 :             sRet = "kv";
     726           0 :             break;
     727             :         case USCRIPT_PHAGS_PA:
     728           0 :             sRet = "xng";
     729           0 :             break;
     730             :         case USCRIPT_PHOENICIAN:
     731           0 :             sRet = "phn";
     732           0 :             break;
     733             :         case USCRIPT_PHONETIC_POLLARD:
     734           0 :             sRet = "hmd";
     735           0 :             break;
     736             :         case USCRIPT_RONGORONGO:
     737           0 :             sRet = "rap";
     738           0 :             break;
     739             :         case USCRIPT_SARATI:
     740           0 :             sRet = "qya";
     741           0 :             break;
     742             :         case USCRIPT_ESTRANGELO_SYRIAC:
     743           0 :             sRet = "syr";
     744           0 :             break;
     745             :         case USCRIPT_WESTERN_SYRIAC:
     746           0 :             sRet = "tru";
     747           0 :             break;
     748             :         case USCRIPT_EASTERN_SYRIAC:
     749           0 :             sRet = "aii";
     750           0 :             break;
     751             :         case USCRIPT_TENGWAR:
     752           0 :             sRet = "sjn";
     753           0 :             break;
     754             :         case USCRIPT_VAI:
     755           0 :             sRet = "vai";
     756           0 :             break;
     757             :         case USCRIPT_VISIBLE_SPEECH:
     758           0 :             sRet = "en";
     759           0 :             break;
     760             :         case USCRIPT_CUNEIFORM:
     761           0 :             sRet = "akk";
     762           0 :             break;
     763             :         case USCRIPT_CARIAN:
     764           0 :             sRet = "xcr";
     765           0 :             break;
     766             :         case USCRIPT_JAPANESE:
     767           0 :             sRet = "ja";
     768           0 :             break;
     769             :         case USCRIPT_LANNA:
     770           0 :             sRet = "nod";
     771           0 :             break;
     772             :         case USCRIPT_LYCIAN:
     773           0 :             sRet = "xlc";
     774           0 :             break;
     775             :         case USCRIPT_LYDIAN:
     776           0 :             sRet = "xld";
     777           0 :             break;
     778             :         case USCRIPT_OL_CHIKI:
     779           0 :             sRet = "sat";
     780           0 :             break;
     781             :         case USCRIPT_REJANG:
     782           0 :             sRet = "rej";
     783           0 :             break;
     784             :         case USCRIPT_SAURASHTRA:
     785           0 :             sRet = "saz";
     786           0 :             break;
     787             :         case USCRIPT_SIGN_WRITING:
     788           0 :             sRet = "en";
     789           0 :             break;
     790             :         case USCRIPT_SUNDANESE:
     791           0 :             sRet = "su";
     792           0 :             break;
     793             :         case USCRIPT_MOON:
     794           0 :             sRet = "en";
     795           0 :             break;
     796             :         case USCRIPT_MEITEI_MAYEK:
     797           0 :             sRet = "mni";
     798           0 :             break;
     799             :         case USCRIPT_IMPERIAL_ARAMAIC:
     800           0 :             sRet = "arc";
     801           0 :             break;
     802             :         case USCRIPT_AVESTAN:
     803           0 :             sRet = "ae";
     804           0 :             break;
     805             :         case USCRIPT_CHAKMA:
     806           0 :             sRet = "ccp";
     807           0 :             break;
     808             :         case USCRIPT_KOREAN:
     809           0 :             sRet = "ko";
     810           0 :             break;
     811             :         case USCRIPT_KAITHI:
     812           0 :             sRet = "awa";
     813           0 :             break;
     814             :         case USCRIPT_MANICHAEAN:
     815           0 :             sRet = "xmn";
     816           0 :             break;
     817             :         case USCRIPT_INSCRIPTIONAL_PAHLAVI:
     818             :         case USCRIPT_PSALTER_PAHLAVI:
     819             :         case USCRIPT_BOOK_PAHLAVI:
     820             :         case USCRIPT_INSCRIPTIONAL_PARTHIAN:
     821           0 :             sRet = "xpr";
     822           0 :             break;
     823             :         case USCRIPT_SAMARITAN:
     824           0 :             sRet = "heb";
     825           0 :             break;
     826             :         case USCRIPT_TAI_VIET:
     827           0 :             sRet = "blt";
     828           0 :             break;
     829             :         case USCRIPT_MANDAEAN: /* Aliased to USCRIPT_MANDAIC in icu 4.6. */
     830           0 :             sRet = "mic";
     831           0 :             break;
     832             : #if (U_ICU_VERSION_MAJOR_NUM > 4) || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM >= 4)
     833             :         case USCRIPT_NABATAEAN: //no language with an assigned code yet
     834           0 :             sRet = "mis";
     835           0 :             break;
     836             :         case USCRIPT_PALMYRENE: //no language with an assigned code yet
     837           0 :             sRet = "mis";
     838           0 :             break;
     839             :         case USCRIPT_BAMUM:
     840           0 :             sRet = "bax";
     841           0 :             break;
     842             :         case USCRIPT_LISU:
     843           0 :             sRet = "lis";
     844           0 :             break;
     845             :         case USCRIPT_NAKHI_GEBA:
     846           0 :             sRet = "nxq";
     847           0 :             break;
     848             :         case USCRIPT_OLD_SOUTH_ARABIAN:
     849           0 :             sRet = "xsa";
     850           0 :             break;
     851             :         case USCRIPT_BASSA_VAH:
     852           0 :             sRet = "bsq";
     853           0 :             break;
     854             :         case USCRIPT_DUPLOYAN_SHORTAND:
     855           0 :             sRet = "fr";
     856           0 :             break;
     857             :         case USCRIPT_ELBASAN:
     858           0 :             sRet = "sq";
     859           0 :             break;
     860             :         case USCRIPT_GRANTHA:
     861           0 :             sRet = "ta";
     862           0 :             break;
     863             :         case USCRIPT_KPELLE:
     864           0 :             sRet = "kpe";
     865           0 :             break;
     866             :         case USCRIPT_LOMA:
     867           0 :             sRet = "lom";
     868           0 :             break;
     869             :         case USCRIPT_MENDE:
     870           0 :             sRet = "men";
     871           0 :             break;
     872             :         case USCRIPT_MEROITIC_CURSIVE:
     873           0 :             sRet = "xmr";
     874           0 :             break;
     875             :         case USCRIPT_OLD_NORTH_ARABIAN:
     876           0 :             sRet = "xna";
     877           0 :             break;
     878             :         case USCRIPT_SINDHI:
     879           0 :             sRet = "sd";
     880           0 :             break;
     881             :         case USCRIPT_WARANG_CITI:
     882           0 :             sRet = "hoc";
     883           0 :             break;
     884             : #endif
     885             : #if (U_ICU_VERSION_MAJOR_NUM > 4) || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM >= 8)
     886             :         case USCRIPT_AFAKA:
     887           0 :             sRet = "djk";
     888           0 :             break;
     889             :         case USCRIPT_JURCHEN:
     890           0 :             sRet = "juc";
     891           0 :             break;
     892             :         case USCRIPT_MRO:
     893           0 :             sRet = "cmr";
     894           0 :             break;
     895             :         case USCRIPT_NUSHU: //no language with an assigned code yet
     896           0 :             sRet = "mis";
     897           0 :             break;
     898             :         case USCRIPT_SHARADA:
     899           0 :             sRet = "sa";
     900           0 :             break;
     901             :         case USCRIPT_SORA_SOMPENG:
     902           0 :             sRet = "srb";
     903           0 :             break;
     904             :         case USCRIPT_TAKRI:
     905           0 :             sRet = "doi";
     906           0 :             break;
     907             :         case USCRIPT_TANGUT:
     908           0 :             sRet = "txg";
     909           0 :             break;
     910             :         case USCRIPT_WOLEAI:
     911           0 :             sRet = "woe";
     912           0 :             break;
     913             : #endif
     914             : #if (U_ICU_VERSION_MAJOR_NUM > 4)
     915             :         case USCRIPT_ANATOLIAN_HIEROGLYPHS:
     916           0 :             sRet = "hlu";
     917           0 :             break;
     918             :         case USCRIPT_KHOJKI:
     919           0 :             sRet = "gu";
     920           0 :             break;
     921             :         case USCRIPT_TIRHUTA:
     922           0 :             sRet = "mai";
     923           0 :             break;
     924             : #endif
     925             :     }
     926           0 :     return sRet;
     927             : }
     928             : 
     929             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10