Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <com/sun/star/i18n/UnicodeType.hpp>
21 : #include <com/sun/star/i18n/KCharacterType.hpp>
22 : #include <com/sun/star/i18n/ScriptType.hpp>
23 : #include <i18nutil/unicode.hxx>
24 : #include "unicode_data.h"
25 :
26 : // Workaround for glibc braindamage:
27 : // glibc 2.4's langinfo.h does "#define CURRENCY_SYMBOL __CURRENCY_SYMBOL"
28 : // which (obviously) breaks UnicodeType::CURRENCY_SYMBOL
29 : #undef CURRENCY_SYMBOL
30 :
31 : using namespace ::com::sun::star::i18n;
32 :
33 : static ScriptTypeList defaultTypeList[] = {
34 : { UnicodeScript_kBasicLatin,
35 : UnicodeScript_kBasicLatin,
36 : UnicodeScript_kBasicLatin }, // 0,
37 : { UnicodeScript_kLatin1Supplement,
38 : UnicodeScript_kLatin1Supplement,
39 : UnicodeScript_kLatin1Supplement },// 1,
40 : { UnicodeScript_kLatinExtendedA,
41 : UnicodeScript_kLatinExtendedA,
42 : UnicodeScript_kLatinExtendedA }, // 2,
43 : { UnicodeScript_kLatinExtendedB,
44 : UnicodeScript_kLatinExtendedB,
45 : UnicodeScript_kLatinExtendedB }, // 3,
46 : { UnicodeScript_kIPAExtension,
47 : UnicodeScript_kIPAExtension,
48 : UnicodeScript_kIPAExtension }, // 4,
49 : { UnicodeScript_kSpacingModifier,
50 : UnicodeScript_kSpacingModifier,
51 : UnicodeScript_kSpacingModifier }, // 5,
52 : { UnicodeScript_kCombiningDiacritical,
53 : UnicodeScript_kCombiningDiacritical,
54 : UnicodeScript_kCombiningDiacritical }, // 6,
55 : { UnicodeScript_kGreek,
56 : UnicodeScript_kGreek,
57 : UnicodeScript_kGreek }, // 7,
58 : { UnicodeScript_kCyrillic,
59 : UnicodeScript_kCyrillic,
60 : UnicodeScript_kCyrillic }, // 8,
61 : { UnicodeScript_kArmenian,
62 : UnicodeScript_kArmenian,
63 : UnicodeScript_kArmenian }, // 9,
64 : { UnicodeScript_kHebrew,
65 : UnicodeScript_kHebrew,
66 : UnicodeScript_kHebrew }, // 10,
67 : { UnicodeScript_kArabic,
68 : UnicodeScript_kArabic,
69 : UnicodeScript_kArabic }, // 11,
70 : { UnicodeScript_kSyriac,
71 : UnicodeScript_kSyriac,
72 : UnicodeScript_kSyriac }, // 12,
73 : { UnicodeScript_kThaana,
74 : UnicodeScript_kThaana,
75 : UnicodeScript_kThaana }, // 13,
76 : { UnicodeScript_kDevanagari,
77 : UnicodeScript_kDevanagari,
78 : UnicodeScript_kDevanagari }, // 14,
79 : { UnicodeScript_kBengali,
80 : UnicodeScript_kBengali,
81 : UnicodeScript_kBengali }, // 15,
82 : { UnicodeScript_kGurmukhi,
83 : UnicodeScript_kGurmukhi,
84 : UnicodeScript_kGurmukhi }, // 16,
85 : { UnicodeScript_kGujarati,
86 : UnicodeScript_kGujarati,
87 : UnicodeScript_kGujarati }, // 17,
88 : { UnicodeScript_kOriya,
89 : UnicodeScript_kOriya,
90 : UnicodeScript_kOriya }, // 18,
91 : { UnicodeScript_kTamil,
92 : UnicodeScript_kTamil,
93 : UnicodeScript_kTamil }, // 19,
94 : { UnicodeScript_kTelugu,
95 : UnicodeScript_kTelugu,
96 : UnicodeScript_kTelugu }, // 20,
97 : { UnicodeScript_kKannada,
98 : UnicodeScript_kKannada,
99 : UnicodeScript_kKannada }, // 21,
100 : { UnicodeScript_kMalayalam,
101 : UnicodeScript_kMalayalam,
102 : UnicodeScript_kMalayalam }, // 22,
103 : { UnicodeScript_kSinhala,
104 : UnicodeScript_kSinhala,
105 : UnicodeScript_kSinhala }, // 23,
106 : { UnicodeScript_kThai,
107 : UnicodeScript_kThai,
108 : UnicodeScript_kThai }, // 24,
109 : { UnicodeScript_kLao,
110 : UnicodeScript_kLao,
111 : UnicodeScript_kLao }, // 25,
112 : { UnicodeScript_kTibetan,
113 : UnicodeScript_kTibetan,
114 : UnicodeScript_kTibetan }, // 26,
115 : { UnicodeScript_kMyanmar,
116 : UnicodeScript_kMyanmar,
117 : UnicodeScript_kMyanmar }, // 27,
118 : { UnicodeScript_kGeorgian,
119 : UnicodeScript_kGeorgian,
120 : UnicodeScript_kGeorgian }, // 28,
121 : { UnicodeScript_kHangulJamo,
122 : UnicodeScript_kHangulJamo,
123 : UnicodeScript_kHangulJamo }, // 29,
124 : { UnicodeScript_kEthiopic,
125 : UnicodeScript_kEthiopic,
126 : UnicodeScript_kEthiopic }, // 30,
127 : { UnicodeScript_kCherokee,
128 : UnicodeScript_kCherokee,
129 : UnicodeScript_kCherokee }, // 31,
130 : { UnicodeScript_kUnifiedCanadianAboriginalSyllabics,
131 : UnicodeScript_kUnifiedCanadianAboriginalSyllabics,
132 : UnicodeScript_kUnifiedCanadianAboriginalSyllabics }, // 32,
133 : { UnicodeScript_kOgham,
134 : UnicodeScript_kOgham,
135 : UnicodeScript_kOgham }, // 33,
136 : { UnicodeScript_kRunic,
137 : UnicodeScript_kRunic,
138 : UnicodeScript_kRunic }, // 34,
139 : { UnicodeScript_kKhmer,
140 : UnicodeScript_kKhmer,
141 : UnicodeScript_kKhmer }, // 35,
142 : { UnicodeScript_kMongolian,
143 : UnicodeScript_kMongolian,
144 : UnicodeScript_kMongolian }, // 36,
145 : { UnicodeScript_kLatinExtendedAdditional,
146 : UnicodeScript_kLatinExtendedAdditional,
147 : UnicodeScript_kLatinExtendedAdditional }, // 37,
148 : { UnicodeScript_kGreekExtended,
149 : UnicodeScript_kGreekExtended,
150 : UnicodeScript_kGreekExtended }, // 38,
151 : { UnicodeScript_kGeneralPunctuation,
152 : UnicodeScript_kGeneralPunctuation,
153 : UnicodeScript_kGeneralPunctuation }, // 39,
154 : { UnicodeScript_kSuperSubScript,
155 : UnicodeScript_kSuperSubScript,
156 : UnicodeScript_kSuperSubScript }, // 40,
157 : { UnicodeScript_kCurrencySymbolScript,
158 : UnicodeScript_kCurrencySymbolScript,
159 : UnicodeScript_kCurrencySymbolScript }, // 41,
160 : { UnicodeScript_kSymbolCombiningMark,
161 : UnicodeScript_kSymbolCombiningMark,
162 : UnicodeScript_kSymbolCombiningMark }, // 42,
163 : { UnicodeScript_kLetterlikeSymbol,
164 : UnicodeScript_kLetterlikeSymbol,
165 : UnicodeScript_kLetterlikeSymbol }, // 43,
166 : { UnicodeScript_kNumberForm,
167 : UnicodeScript_kNumberForm,
168 : UnicodeScript_kNumberForm }, // 44,
169 : { UnicodeScript_kArrow,
170 : UnicodeScript_kArrow,
171 : UnicodeScript_kArrow }, // 45,
172 : { UnicodeScript_kMathOperator,
173 : UnicodeScript_kMathOperator,
174 : UnicodeScript_kMathOperator }, // 46,
175 : { UnicodeScript_kMiscTechnical,
176 : UnicodeScript_kMiscTechnical,
177 : UnicodeScript_kMiscTechnical }, // 47,
178 : { UnicodeScript_kControlPicture,
179 : UnicodeScript_kControlPicture,
180 : UnicodeScript_kControlPicture }, // 48,
181 : { UnicodeScript_kOpticalCharacter,
182 : UnicodeScript_kOpticalCharacter,
183 : UnicodeScript_kOpticalCharacter }, // 49,
184 : { UnicodeScript_kEnclosedAlphanumeric,
185 : UnicodeScript_kEnclosedAlphanumeric,
186 : UnicodeScript_kEnclosedAlphanumeric }, // 50,
187 : { UnicodeScript_kBoxDrawing,
188 : UnicodeScript_kBoxDrawing,
189 : UnicodeScript_kBoxDrawing }, // 51,
190 : { UnicodeScript_kBlockElement,
191 : UnicodeScript_kBlockElement,
192 : UnicodeScript_kBlockElement }, // 52,
193 : { UnicodeScript_kGeometricShape,
194 : UnicodeScript_kGeometricShape,
195 : UnicodeScript_kGeometricShape }, // 53,
196 : { UnicodeScript_kMiscSymbol,
197 : UnicodeScript_kMiscSymbol,
198 : UnicodeScript_kMiscSymbol }, // 54,
199 : { UnicodeScript_kDingbat,
200 : UnicodeScript_kDingbat,
201 : UnicodeScript_kDingbat }, // 55,
202 : { UnicodeScript_kBraillePatterns,
203 : UnicodeScript_kBraillePatterns,
204 : UnicodeScript_kBraillePatterns }, // 56,
205 : { UnicodeScript_kCJKRadicalsSupplement,
206 : UnicodeScript_kCJKRadicalsSupplement,
207 : UnicodeScript_kCJKRadicalsSupplement }, // 57,
208 : { UnicodeScript_kKangxiRadicals,
209 : UnicodeScript_kKangxiRadicals,
210 : UnicodeScript_kKangxiRadicals }, // 58,
211 : { UnicodeScript_kIdeographicDescriptionCharacters,
212 : UnicodeScript_kIdeographicDescriptionCharacters,
213 : UnicodeScript_kIdeographicDescriptionCharacters }, // 59,
214 : { UnicodeScript_kCJKSymbolPunctuation,
215 : UnicodeScript_kCJKSymbolPunctuation,
216 : UnicodeScript_kCJKSymbolPunctuation }, // 60,
217 : { UnicodeScript_kHiragana,
218 : UnicodeScript_kHiragana,
219 : UnicodeScript_kHiragana }, // 61,
220 : { UnicodeScript_kKatakana,
221 : UnicodeScript_kKatakana,
222 : UnicodeScript_kKatakana }, // 62,
223 : { UnicodeScript_kBopomofo,
224 : UnicodeScript_kBopomofo,
225 : UnicodeScript_kBopomofo }, // 63,
226 : { UnicodeScript_kHangulCompatibilityJamo,
227 : UnicodeScript_kHangulCompatibilityJamo,
228 : UnicodeScript_kHangulCompatibilityJamo }, // 64,
229 : { UnicodeScript_kKanbun,
230 : UnicodeScript_kKanbun,
231 : UnicodeScript_kKanbun }, // 65,
232 : { UnicodeScript_kBopomofoExtended,
233 : UnicodeScript_kBopomofoExtended,
234 : UnicodeScript_kBopomofoExtended }, // 66,
235 : { UnicodeScript_kEnclosedCJKLetterMonth,
236 : UnicodeScript_kEnclosedCJKLetterMonth,
237 : UnicodeScript_kEnclosedCJKLetterMonth }, // 67,
238 : { UnicodeScript_kCJKCompatibility,
239 : UnicodeScript_kCJKCompatibility,
240 : UnicodeScript_kCJKCompatibility }, // 68,
241 : { UnicodeScript_k_CJKUnifiedIdeographsExtensionA,
242 : UnicodeScript_k_CJKUnifiedIdeographsExtensionA,
243 : UnicodeScript_k_CJKUnifiedIdeographsExtensionA }, // 69,
244 : { UnicodeScript_kCJKUnifiedIdeograph,
245 : UnicodeScript_kCJKUnifiedIdeograph,
246 : UnicodeScript_kCJKUnifiedIdeograph }, // 70,
247 : { UnicodeScript_kYiSyllables,
248 : UnicodeScript_kYiSyllables,
249 : UnicodeScript_kYiSyllables }, // 71,
250 : { UnicodeScript_kYiRadicals,
251 : UnicodeScript_kYiRadicals,
252 : UnicodeScript_kYiRadicals }, // 72,
253 : { UnicodeScript_kHangulSyllable,
254 : UnicodeScript_kHangulSyllable,
255 : UnicodeScript_kHangulSyllable }, // 73,
256 : { UnicodeScript_kHighSurrogate,
257 : UnicodeScript_kHighSurrogate,
258 : UnicodeScript_kHighSurrogate }, // 74,
259 : { UnicodeScript_kHighPrivateUseSurrogate,
260 : UnicodeScript_kHighPrivateUseSurrogate,
261 : UnicodeScript_kHighPrivateUseSurrogate }, // 75,
262 : { UnicodeScript_kLowSurrogate,
263 : UnicodeScript_kLowSurrogate,
264 : UnicodeScript_kLowSurrogate }, // 76,
265 : { UnicodeScript_kPrivateUse,
266 : UnicodeScript_kPrivateUse,
267 : UnicodeScript_kPrivateUse }, // 77,
268 : { UnicodeScript_kCJKCompatibilityIdeograph,
269 : UnicodeScript_kCJKCompatibilityIdeograph,
270 : UnicodeScript_kCJKCompatibilityIdeograph }, // 78,
271 : { UnicodeScript_kAlphabeticPresentation,
272 : UnicodeScript_kAlphabeticPresentation,
273 : UnicodeScript_kAlphabeticPresentation }, // 79,
274 : { UnicodeScript_kArabicPresentationA,
275 : UnicodeScript_kArabicPresentationA,
276 : UnicodeScript_kArabicPresentationA }, // 80,
277 : { UnicodeScript_kCombiningHalfMark,
278 : UnicodeScript_kCombiningHalfMark,
279 : UnicodeScript_kCombiningHalfMark }, // 81,
280 : { UnicodeScript_kCJKCompatibilityForm,
281 : UnicodeScript_kCJKCompatibilityForm,
282 : UnicodeScript_kCJKCompatibilityForm }, // 82,
283 : { UnicodeScript_kSmallFormVariant,
284 : UnicodeScript_kSmallFormVariant,
285 : UnicodeScript_kSmallFormVariant }, // 83,
286 : { UnicodeScript_kArabicPresentationB,
287 : UnicodeScript_kArabicPresentationB,
288 : UnicodeScript_kArabicPresentationB }, // 84,
289 : { UnicodeScript_kNoScript,
290 : UnicodeScript_kNoScript,
291 : UnicodeScript_kNoScript }, // 85,
292 : { UnicodeScript_kHalfwidthFullwidthForm,
293 : UnicodeScript_kHalfwidthFullwidthForm,
294 : UnicodeScript_kHalfwidthFullwidthForm }, // 86,
295 : { UnicodeScript_kScriptCount,
296 : UnicodeScript_kScriptCount,
297 : UnicodeScript_kNoScript } // 87,
298 : };
299 :
300 : sal_Int16 SAL_CALL
301 18 : unicode::getUnicodeScriptType( const sal_Unicode ch, ScriptTypeList* typeList, sal_Int16 unknownType ) {
302 :
303 18 : if (!typeList) {
304 0 : typeList = defaultTypeList;
305 0 : unknownType = UnicodeScript_kNoScript;
306 : }
307 :
308 18 : sal_Int16 i = 0, type = typeList[0].to;
309 36 : while (type < UnicodeScript_kScriptCount && ch > UnicodeScriptType[type][UnicodeScriptTypeTo]) {
310 0 : type = typeList[++i].to;
311 : }
312 :
313 18 : return (type < UnicodeScript_kScriptCount &&
314 18 : ch >= UnicodeScriptType[typeList[i].from][UnicodeScriptTypeFrom]) ?
315 30 : typeList[i].value : unknownType;
316 : }
317 :
318 : sal_Unicode SAL_CALL
319 20 : unicode::getUnicodeScriptStart( UnicodeScript type) {
320 20 : return UnicodeScriptType[type][UnicodeScriptTypeFrom];
321 : }
322 :
323 : sal_Unicode SAL_CALL
324 20 : unicode::getUnicodeScriptEnd( UnicodeScript type) {
325 20 : return UnicodeScriptType[type][UnicodeScriptTypeTo];
326 : }
327 :
328 : sal_Int16 SAL_CALL
329 10924 : unicode::getUnicodeType( const sal_Unicode ch ) {
330 : static sal_Unicode c = 0x00;
331 : static sal_Int16 r = 0x00;
332 :
333 10924 : if (ch == c) return r;
334 3593 : else c = ch;
335 :
336 3593 : sal_Int16 address = UnicodeTypeIndex[ch >> 8];
337 0 : return r = (sal_Int16)((address < UnicodeTypeNumberBlock) ? UnicodeTypeBlockValue[address] :
338 3593 : UnicodeTypeValue[((address - UnicodeTypeNumberBlock) << 8) + (ch & 0xff)]);
339 : }
340 :
341 : sal_uInt8 SAL_CALL
342 0 : unicode::getUnicodeDirection( const sal_Unicode ch ) {
343 : static sal_Unicode c = 0x00;
344 : static sal_uInt8 r = 0x00;
345 :
346 0 : if (ch == c) return r;
347 0 : else c = ch;
348 :
349 0 : sal_Int16 address = UnicodeDirectionIndex[ch >> 8];
350 0 : return r = ((address < UnicodeDirectionNumberBlock) ? UnicodeDirectionBlockValue[address] :
351 0 : UnicodeDirectionValue[((address - UnicodeDirectionNumberBlock) << 8) + (ch & 0xff)]);
352 :
353 : }
354 :
355 : #define bit(name) (1 << name)
356 :
357 : #define UPPERMASK bit(UnicodeType::UPPERCASE_LETTER)
358 :
359 : #define LOWERMASK bit(UnicodeType::LOWERCASE_LETTER)
360 :
361 : #define TITLEMASK bit(UnicodeType::TITLECASE_LETTER)
362 :
363 : #define ALPHAMASK UPPERMASK|LOWERMASK|TITLEMASK|\
364 : bit(UnicodeType::MODIFIER_LETTER)|\
365 : bit(UnicodeType::OTHER_LETTER)
366 :
367 : #define SPACEMASK bit(UnicodeType::SPACE_SEPARATOR)|\
368 : bit(UnicodeType::LINE_SEPARATOR)|\
369 : bit(UnicodeType::PARAGRAPH_SEPARATOR)
370 :
371 : #define CONTROLMASK bit(UnicodeType::CONTROL)|\
372 : bit(UnicodeType::FORMAT)|\
373 : bit(UnicodeType::LINE_SEPARATOR)|\
374 : bit(UnicodeType::PARAGRAPH_SEPARATOR)
375 :
376 : #define IsType(func, mask) \
377 : sal_Bool SAL_CALL func( const sal_Unicode ch) {\
378 : return (bit(getUnicodeType(ch)) & (mask)) != 0;\
379 : }
380 :
381 5052 : IsType(unicode::isControl, CONTROLMASK)
382 5820 : IsType(unicode::isAlpha, ALPHAMASK)
383 40 : IsType(unicode::isSpace, SPACEMASK)
384 :
385 : #define CONTROLSPACE bit(0x09)|bit(0x0a)|bit(0x0b)|bit(0x0c)|bit(0x0d)|\
386 : bit(0x1c)|bit(0x1d)|bit(0x1e)|bit(0x1f)
387 :
388 40 : sal_Bool SAL_CALL unicode::isWhiteSpace( const sal_Unicode ch) {
389 40 : return (ch != 0xa0 && isSpace(ch)) || (ch <= 0x1F && (bit(ch) & (CONTROLSPACE)));
390 : }
391 :
392 7712 : sal_Int16 SAL_CALL unicode::getScriptClassFromUScriptCode(UScriptCode eScript)
393 : {
394 : //See unicode/uscript.h
395 : static sal_Int16 scriptTypes[] =
396 : {
397 : ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX,
398 : ScriptType::ASIAN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
399 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN,
400 : // 15
401 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN, ScriptType::COMPLEX,
402 : ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX,
403 : ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
404 : // 30
405 : ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
406 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
407 : ScriptType::LATIN, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
408 : // 45
409 : ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
410 : ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
411 : ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
412 : // 60
413 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
414 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
415 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN,
416 : // 75
417 : ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
418 : ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
419 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
420 : // 90
421 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
422 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
423 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX,
424 : // 105
425 : ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
426 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
427 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN,
428 : // 120
429 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
430 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK,
431 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
432 : // 135
433 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
434 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
435 : ScriptType::COMPLEX,
436 : ScriptType::WEAK
437 : };
438 :
439 : sal_Int16 nRet;
440 7712 : if (eScript < USCRIPT_COMMON)
441 0 : nRet = ScriptType::WEAK;
442 7712 : else if (static_cast<size_t>(eScript) >= SAL_N_ELEMENTS(scriptTypes))
443 0 : nRet = ScriptType::COMPLEX; // anything new is going to be pretty wild
444 : else
445 7712 : nRet = scriptTypes[eScript];
446 7712 : return nRet;
447 : }
448 :
449 0 : OString SAL_CALL unicode::getExemplerLanguageForUScriptCode(UScriptCode eScript)
450 : {
451 0 : OString sRet;
452 0 : switch (eScript)
453 : {
454 : case USCRIPT_CODE_LIMIT:
455 : case USCRIPT_INVALID_CODE:
456 0 : sRet = "zxx";
457 0 : break;
458 : case USCRIPT_COMMON:
459 : case USCRIPT_INHERITED:
460 0 : sRet = "und";
461 0 : break;
462 : case USCRIPT_MATHEMATICAL_NOTATION:
463 : case USCRIPT_SYMBOLS:
464 0 : sRet = "zxx";
465 0 : break;
466 : case USCRIPT_UNWRITTEN_LANGUAGES:
467 : case USCRIPT_UNKNOWN:
468 0 : sRet = "und";
469 0 : break;
470 : case USCRIPT_ARABIC:
471 0 : sRet = "ar";
472 0 : break;
473 : case USCRIPT_ARMENIAN:
474 0 : sRet = "hy";
475 0 : break;
476 : case USCRIPT_BENGALI:
477 0 : sRet = "bn";
478 0 : break;
479 : case USCRIPT_BOPOMOFO:
480 0 : sRet = "zh";
481 0 : break;
482 : case USCRIPT_CHEROKEE:
483 0 : sRet = "chr";
484 0 : break;
485 : case USCRIPT_COPTIC:
486 0 : sRet = "cop";
487 0 : break;
488 : case USCRIPT_CYRILLIC:
489 0 : sRet = "ru";
490 0 : break;
491 : case USCRIPT_DESERET:
492 0 : sRet = "en";
493 0 : break;
494 : case USCRIPT_DEVANAGARI:
495 0 : sRet = "hi";
496 0 : break;
497 : case USCRIPT_ETHIOPIC:
498 0 : sRet = "am";
499 0 : break;
500 : case USCRIPT_GEORGIAN:
501 0 : sRet = "ka";
502 0 : break;
503 : case USCRIPT_GOTHIC:
504 0 : sRet = "got";
505 0 : break;
506 : case USCRIPT_GREEK:
507 0 : sRet = "el";
508 0 : break;
509 : case USCRIPT_GUJARATI:
510 0 : sRet = "gu";
511 0 : break;
512 : case USCRIPT_GURMUKHI:
513 0 : sRet = "pa";
514 0 : break;
515 : case USCRIPT_HAN:
516 0 : sRet = "zh";
517 0 : break;
518 : case USCRIPT_HANGUL:
519 0 : sRet = "ko";
520 0 : break;
521 : case USCRIPT_HEBREW:
522 0 : sRet = "hr";
523 0 : break;
524 : case USCRIPT_HIRAGANA:
525 0 : sRet = "ja";
526 0 : break;
527 : case USCRIPT_KANNADA:
528 0 : sRet = "kn";
529 0 : break;
530 : case USCRIPT_KATAKANA:
531 0 : sRet = "ja";
532 0 : break;
533 : case USCRIPT_KHMER:
534 0 : sRet = "km";
535 0 : break;
536 : case USCRIPT_LAO:
537 0 : sRet = "lo";
538 0 : break;
539 : case USCRIPT_LATIN:
540 0 : sRet = "en";
541 0 : break;
542 : case USCRIPT_MALAYALAM:
543 0 : sRet = "ml";
544 0 : break;
545 : case USCRIPT_MONGOLIAN:
546 0 : sRet = "mn";
547 0 : break;
548 : case USCRIPT_MYANMAR:
549 0 : sRet = "my";
550 0 : break;
551 : case USCRIPT_OGHAM:
552 0 : sRet = "pgl";
553 0 : break;
554 : case USCRIPT_OLD_ITALIC:
555 0 : sRet = "osc";
556 0 : break;
557 : case USCRIPT_ORIYA:
558 0 : sRet = "or";
559 0 : break;
560 : case USCRIPT_RUNIC:
561 0 : sRet = "ang";
562 0 : break;
563 : case USCRIPT_SINHALA:
564 0 : sRet = "si";
565 0 : break;
566 : case USCRIPT_SYRIAC:
567 0 : sRet = "syr";
568 0 : break;
569 : case USCRIPT_TAMIL:
570 0 : sRet = "ta";
571 0 : break;
572 : case USCRIPT_TELUGU:
573 0 : sRet = "te";
574 0 : break;
575 : case USCRIPT_THAANA:
576 0 : sRet = "dv";
577 0 : break;
578 : case USCRIPT_THAI:
579 0 : sRet = "th";
580 0 : break;
581 : case USCRIPT_TIBETAN:
582 0 : sRet = "bo";
583 0 : break;
584 : case USCRIPT_CANADIAN_ABORIGINAL:
585 0 : sRet = "iu";
586 0 : break;
587 : case USCRIPT_YI:
588 0 : sRet = "ii";
589 0 : break;
590 : case USCRIPT_TAGALOG:
591 0 : sRet = "tl";
592 0 : break;
593 : case USCRIPT_HANUNOO:
594 0 : sRet = "hnn";
595 0 : break;
596 : case USCRIPT_BUHID:
597 0 : sRet = "bku";
598 0 : break;
599 : case USCRIPT_TAGBANWA:
600 0 : sRet = "tbw";
601 0 : break;
602 : case USCRIPT_BRAILLE:
603 0 : sRet = "en";
604 0 : break;
605 : case USCRIPT_CYPRIOT:
606 0 : sRet = "ecy";
607 0 : break;
608 : case USCRIPT_LIMBU:
609 0 : sRet = "lif";
610 0 : break;
611 : case USCRIPT_LINEAR_B:
612 0 : sRet = "gmy";
613 0 : break;
614 : case USCRIPT_OSMANYA:
615 0 : sRet = "so";
616 0 : break;
617 : case USCRIPT_SHAVIAN:
618 0 : sRet = "en";
619 0 : break;
620 : case USCRIPT_TAI_LE:
621 0 : sRet = "tdd";
622 0 : break;
623 : case USCRIPT_UGARITIC:
624 0 : sRet = "uga";
625 0 : break;
626 : case USCRIPT_KATAKANA_OR_HIRAGANA:
627 0 : sRet = "ja";
628 0 : break;
629 : case USCRIPT_BUGINESE:
630 0 : sRet = "bug";
631 0 : break;
632 : case USCRIPT_GLAGOLITIC:
633 0 : sRet = "ch";
634 0 : break;
635 : case USCRIPT_KHAROSHTHI:
636 0 : sRet = "pra";
637 0 : break;
638 : case USCRIPT_SYLOTI_NAGRI:
639 0 : sRet = "syl";
640 0 : break;
641 : case USCRIPT_NEW_TAI_LUE:
642 0 : sRet = "khb";
643 0 : break;
644 : case USCRIPT_TIFINAGH:
645 0 : sRet = "tmh";
646 0 : break;
647 : case USCRIPT_OLD_PERSIAN:
648 0 : sRet = "peo";
649 0 : break;
650 : case USCRIPT_BALINESE:
651 0 : sRet = "ban";
652 0 : break;
653 : case USCRIPT_BATAK:
654 0 : sRet = "btk";
655 0 : break;
656 : case USCRIPT_BLISSYMBOLS:
657 0 : sRet = "en";
658 0 : break;
659 : case USCRIPT_BRAHMI:
660 0 : sRet = "pra";
661 0 : break;
662 : case USCRIPT_CHAM:
663 0 : sRet = "cja";
664 0 : break;
665 : case USCRIPT_CIRTH:
666 0 : sRet = "sjn";
667 0 : break;
668 : case USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC:
669 0 : sRet = "cu";
670 0 : break;
671 : case USCRIPT_DEMOTIC_EGYPTIAN:
672 : case USCRIPT_HIERATIC_EGYPTIAN:
673 : case USCRIPT_EGYPTIAN_HIEROGLYPHS:
674 0 : sRet = "egy";
675 0 : break;
676 : case USCRIPT_KHUTSURI:
677 0 : sRet = "ka";
678 0 : break;
679 : case USCRIPT_SIMPLIFIED_HAN:
680 0 : sRet = "zh";
681 0 : break;
682 : case USCRIPT_TRADITIONAL_HAN:
683 0 : sRet = "zh";
684 0 : break;
685 : case USCRIPT_PAHAWH_HMONG:
686 0 : sRet = "blu";
687 0 : break;
688 : case USCRIPT_OLD_HUNGARIAN:
689 0 : sRet = "ohu";
690 0 : break;
691 : case USCRIPT_HARAPPAN_INDUS:
692 0 : sRet = "xiv";
693 0 : break;
694 : case USCRIPT_JAVANESE:
695 0 : sRet = "kaw";
696 0 : break;
697 : case USCRIPT_KAYAH_LI:
698 0 : sRet = "eky";
699 0 : break;
700 : case USCRIPT_LATIN_FRAKTUR:
701 0 : sRet = "de";
702 0 : break;
703 : case USCRIPT_LATIN_GAELIC:
704 0 : sRet = "ga";
705 0 : break;
706 : case USCRIPT_LEPCHA:
707 0 : sRet = "lep";
708 0 : break;
709 : case USCRIPT_LINEAR_A:
710 0 : sRet = "ecr";
711 0 : break;
712 : case USCRIPT_MAYAN_HIEROGLYPHS:
713 0 : sRet = "myn";
714 0 : break;
715 : case USCRIPT_MEROITIC:
716 0 : sRet = "xmr";
717 0 : break;
718 : case USCRIPT_NKO:
719 0 : sRet = "nqo";
720 0 : break;
721 : case USCRIPT_ORKHON:
722 0 : sRet = "otk";
723 0 : break;
724 : case USCRIPT_OLD_PERMIC:
725 0 : sRet = "kv";
726 0 : break;
727 : case USCRIPT_PHAGS_PA:
728 0 : sRet = "xng";
729 0 : break;
730 : case USCRIPT_PHOENICIAN:
731 0 : sRet = "phn";
732 0 : break;
733 : case USCRIPT_PHONETIC_POLLARD:
734 0 : sRet = "hmd";
735 0 : break;
736 : case USCRIPT_RONGORONGO:
737 0 : sRet = "rap";
738 0 : break;
739 : case USCRIPT_SARATI:
740 0 : sRet = "qya";
741 0 : break;
742 : case USCRIPT_ESTRANGELO_SYRIAC:
743 0 : sRet = "syr";
744 0 : break;
745 : case USCRIPT_WESTERN_SYRIAC:
746 0 : sRet = "tru";
747 0 : break;
748 : case USCRIPT_EASTERN_SYRIAC:
749 0 : sRet = "aii";
750 0 : break;
751 : case USCRIPT_TENGWAR:
752 0 : sRet = "sjn";
753 0 : break;
754 : case USCRIPT_VAI:
755 0 : sRet = "vai";
756 0 : break;
757 : case USCRIPT_VISIBLE_SPEECH:
758 0 : sRet = "en";
759 0 : break;
760 : case USCRIPT_CUNEIFORM:
761 0 : sRet = "akk";
762 0 : break;
763 : case USCRIPT_CARIAN:
764 0 : sRet = "xcr";
765 0 : break;
766 : case USCRIPT_JAPANESE:
767 0 : sRet = "ja";
768 0 : break;
769 : case USCRIPT_LANNA:
770 0 : sRet = "nod";
771 0 : break;
772 : case USCRIPT_LYCIAN:
773 0 : sRet = "xlc";
774 0 : break;
775 : case USCRIPT_LYDIAN:
776 0 : sRet = "xld";
777 0 : break;
778 : case USCRIPT_OL_CHIKI:
779 0 : sRet = "sat";
780 0 : break;
781 : case USCRIPT_REJANG:
782 0 : sRet = "rej";
783 0 : break;
784 : case USCRIPT_SAURASHTRA:
785 0 : sRet = "saz";
786 0 : break;
787 : case USCRIPT_SIGN_WRITING:
788 0 : sRet = "en";
789 0 : break;
790 : case USCRIPT_SUNDANESE:
791 0 : sRet = "su";
792 0 : break;
793 : case USCRIPT_MOON:
794 0 : sRet = "en";
795 0 : break;
796 : case USCRIPT_MEITEI_MAYEK:
797 0 : sRet = "mni";
798 0 : break;
799 : case USCRIPT_IMPERIAL_ARAMAIC:
800 0 : sRet = "arc";
801 0 : break;
802 : case USCRIPT_AVESTAN:
803 0 : sRet = "ae";
804 0 : break;
805 : case USCRIPT_CHAKMA:
806 0 : sRet = "ccp";
807 0 : break;
808 : case USCRIPT_KOREAN:
809 0 : sRet = "ko";
810 0 : break;
811 : case USCRIPT_KAITHI:
812 0 : sRet = "awa";
813 0 : break;
814 : case USCRIPT_MANICHAEAN:
815 0 : sRet = "xmn";
816 0 : break;
817 : case USCRIPT_INSCRIPTIONAL_PAHLAVI:
818 : case USCRIPT_PSALTER_PAHLAVI:
819 : case USCRIPT_BOOK_PAHLAVI:
820 : case USCRIPT_INSCRIPTIONAL_PARTHIAN:
821 0 : sRet = "xpr";
822 0 : break;
823 : case USCRIPT_SAMARITAN:
824 0 : sRet = "heb";
825 0 : break;
826 : case USCRIPT_TAI_VIET:
827 0 : sRet = "blt";
828 0 : break;
829 : case USCRIPT_MANDAEAN: /* Aliased to USCRIPT_MANDAIC in icu 4.6. */
830 0 : sRet = "mic";
831 0 : break;
832 : #if (U_ICU_VERSION_MAJOR_NUM > 4) || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM >= 4)
833 : case USCRIPT_NABATAEAN: //no language with an assigned code yet
834 0 : sRet = "mis";
835 0 : break;
836 : case USCRIPT_PALMYRENE: //no language with an assigned code yet
837 0 : sRet = "mis";
838 0 : break;
839 : case USCRIPT_BAMUM:
840 0 : sRet = "bax";
841 0 : break;
842 : case USCRIPT_LISU:
843 0 : sRet = "lis";
844 0 : break;
845 : case USCRIPT_NAKHI_GEBA:
846 0 : sRet = "nxq";
847 0 : break;
848 : case USCRIPT_OLD_SOUTH_ARABIAN:
849 0 : sRet = "xsa";
850 0 : break;
851 : case USCRIPT_BASSA_VAH:
852 0 : sRet = "bsq";
853 0 : break;
854 : case USCRIPT_DUPLOYAN_SHORTAND:
855 0 : sRet = "fr";
856 0 : break;
857 : case USCRIPT_ELBASAN:
858 0 : sRet = "sq";
859 0 : break;
860 : case USCRIPT_GRANTHA:
861 0 : sRet = "ta";
862 0 : break;
863 : case USCRIPT_KPELLE:
864 0 : sRet = "kpe";
865 0 : break;
866 : case USCRIPT_LOMA:
867 0 : sRet = "lom";
868 0 : break;
869 : case USCRIPT_MENDE:
870 0 : sRet = "men";
871 0 : break;
872 : case USCRIPT_MEROITIC_CURSIVE:
873 0 : sRet = "xmr";
874 0 : break;
875 : case USCRIPT_OLD_NORTH_ARABIAN:
876 0 : sRet = "xna";
877 0 : break;
878 : case USCRIPT_SINDHI:
879 0 : sRet = "sd";
880 0 : break;
881 : case USCRIPT_WARANG_CITI:
882 0 : sRet = "hoc";
883 0 : break;
884 : #endif
885 : #if (U_ICU_VERSION_MAJOR_NUM > 4) || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM >= 8)
886 : case USCRIPT_AFAKA:
887 0 : sRet = "djk";
888 0 : break;
889 : case USCRIPT_JURCHEN:
890 0 : sRet = "juc";
891 0 : break;
892 : case USCRIPT_MRO:
893 0 : sRet = "cmr";
894 0 : break;
895 : case USCRIPT_NUSHU: //no language with an assigned code yet
896 0 : sRet = "mis";
897 0 : break;
898 : case USCRIPT_SHARADA:
899 0 : sRet = "sa";
900 0 : break;
901 : case USCRIPT_SORA_SOMPENG:
902 0 : sRet = "srb";
903 0 : break;
904 : case USCRIPT_TAKRI:
905 0 : sRet = "doi";
906 0 : break;
907 : case USCRIPT_TANGUT:
908 0 : sRet = "txg";
909 0 : break;
910 : case USCRIPT_WOLEAI:
911 0 : sRet = "woe";
912 0 : break;
913 : #endif
914 : #if (U_ICU_VERSION_MAJOR_NUM > 4)
915 : case USCRIPT_ANATOLIAN_HIEROGLYPHS:
916 0 : sRet = "hlu";
917 0 : break;
918 : case USCRIPT_KHOJKI:
919 0 : sRet = "gu";
920 0 : break;
921 : case USCRIPT_TIRHUTA:
922 0 : sRet = "mai";
923 0 : break;
924 : #endif
925 : }
926 0 : return sRet;
927 : }
928 :
929 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|