Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <com/sun/star/i18n/UnicodeType.hpp>
21 : #include <com/sun/star/i18n/KCharacterType.hpp>
22 : #include <com/sun/star/i18n/ScriptType.hpp>
23 : #include <i18nutil/unicode.hxx>
24 : #include "unicode_data.h"
25 :
26 : // Workaround for glibc braindamage:
27 : // glibc 2.4's langinfo.h does "#define CURRENCY_SYMBOL __CURRENCY_SYMBOL"
28 : // which (obviously) breaks UnicodeType::CURRENCY_SYMBOL
29 : #undef CURRENCY_SYMBOL
30 :
31 : using namespace ::com::sun::star::i18n;
32 :
33 : static ScriptTypeList defaultTypeList[] = {
34 : { UnicodeScript_kBasicLatin,
35 : UnicodeScript_kBasicLatin,
36 : UnicodeScript_kBasicLatin }, // 0,
37 : { UnicodeScript_kLatin1Supplement,
38 : UnicodeScript_kLatin1Supplement,
39 : UnicodeScript_kLatin1Supplement },// 1,
40 : { UnicodeScript_kLatinExtendedA,
41 : UnicodeScript_kLatinExtendedA,
42 : UnicodeScript_kLatinExtendedA }, // 2,
43 : { UnicodeScript_kLatinExtendedB,
44 : UnicodeScript_kLatinExtendedB,
45 : UnicodeScript_kLatinExtendedB }, // 3,
46 : { UnicodeScript_kIPAExtension,
47 : UnicodeScript_kIPAExtension,
48 : UnicodeScript_kIPAExtension }, // 4,
49 : { UnicodeScript_kSpacingModifier,
50 : UnicodeScript_kSpacingModifier,
51 : UnicodeScript_kSpacingModifier }, // 5,
52 : { UnicodeScript_kCombiningDiacritical,
53 : UnicodeScript_kCombiningDiacritical,
54 : UnicodeScript_kCombiningDiacritical }, // 6,
55 : { UnicodeScript_kGreek,
56 : UnicodeScript_kGreek,
57 : UnicodeScript_kGreek }, // 7,
58 : { UnicodeScript_kCyrillic,
59 : UnicodeScript_kCyrillic,
60 : UnicodeScript_kCyrillic }, // 8,
61 : { UnicodeScript_kArmenian,
62 : UnicodeScript_kArmenian,
63 : UnicodeScript_kArmenian }, // 9,
64 : { UnicodeScript_kHebrew,
65 : UnicodeScript_kHebrew,
66 : UnicodeScript_kHebrew }, // 10,
67 : { UnicodeScript_kArabic,
68 : UnicodeScript_kArabic,
69 : UnicodeScript_kArabic }, // 11,
70 : { UnicodeScript_kSyriac,
71 : UnicodeScript_kSyriac,
72 : UnicodeScript_kSyriac }, // 12,
73 : { UnicodeScript_kThaana,
74 : UnicodeScript_kThaana,
75 : UnicodeScript_kThaana }, // 13,
76 : { UnicodeScript_kDevanagari,
77 : UnicodeScript_kDevanagari,
78 : UnicodeScript_kDevanagari }, // 14,
79 : { UnicodeScript_kBengali,
80 : UnicodeScript_kBengali,
81 : UnicodeScript_kBengali }, // 15,
82 : { UnicodeScript_kGurmukhi,
83 : UnicodeScript_kGurmukhi,
84 : UnicodeScript_kGurmukhi }, // 16,
85 : { UnicodeScript_kGujarati,
86 : UnicodeScript_kGujarati,
87 : UnicodeScript_kGujarati }, // 17,
88 : { UnicodeScript_kOriya,
89 : UnicodeScript_kOriya,
90 : UnicodeScript_kOriya }, // 18,
91 : { UnicodeScript_kTamil,
92 : UnicodeScript_kTamil,
93 : UnicodeScript_kTamil }, // 19,
94 : { UnicodeScript_kTelugu,
95 : UnicodeScript_kTelugu,
96 : UnicodeScript_kTelugu }, // 20,
97 : { UnicodeScript_kKannada,
98 : UnicodeScript_kKannada,
99 : UnicodeScript_kKannada }, // 21,
100 : { UnicodeScript_kMalayalam,
101 : UnicodeScript_kMalayalam,
102 : UnicodeScript_kMalayalam }, // 22,
103 : { UnicodeScript_kSinhala,
104 : UnicodeScript_kSinhala,
105 : UnicodeScript_kSinhala }, // 23,
106 : { UnicodeScript_kThai,
107 : UnicodeScript_kThai,
108 : UnicodeScript_kThai }, // 24,
109 : { UnicodeScript_kLao,
110 : UnicodeScript_kLao,
111 : UnicodeScript_kLao }, // 25,
112 : { UnicodeScript_kTibetan,
113 : UnicodeScript_kTibetan,
114 : UnicodeScript_kTibetan }, // 26,
115 : { UnicodeScript_kMyanmar,
116 : UnicodeScript_kMyanmar,
117 : UnicodeScript_kMyanmar }, // 27,
118 : { UnicodeScript_kGeorgian,
119 : UnicodeScript_kGeorgian,
120 : UnicodeScript_kGeorgian }, // 28,
121 : { UnicodeScript_kHangulJamo,
122 : UnicodeScript_kHangulJamo,
123 : UnicodeScript_kHangulJamo }, // 29,
124 : { UnicodeScript_kEthiopic,
125 : UnicodeScript_kEthiopic,
126 : UnicodeScript_kEthiopic }, // 30,
127 : { UnicodeScript_kCherokee,
128 : UnicodeScript_kCherokee,
129 : UnicodeScript_kCherokee }, // 31,
130 : { UnicodeScript_kUnifiedCanadianAboriginalSyllabics,
131 : UnicodeScript_kUnifiedCanadianAboriginalSyllabics,
132 : UnicodeScript_kUnifiedCanadianAboriginalSyllabics }, // 32,
133 : { UnicodeScript_kOgham,
134 : UnicodeScript_kOgham,
135 : UnicodeScript_kOgham }, // 33,
136 : { UnicodeScript_kRunic,
137 : UnicodeScript_kRunic,
138 : UnicodeScript_kRunic }, // 34,
139 : { UnicodeScript_kKhmer,
140 : UnicodeScript_kKhmer,
141 : UnicodeScript_kKhmer }, // 35,
142 : { UnicodeScript_kMongolian,
143 : UnicodeScript_kMongolian,
144 : UnicodeScript_kMongolian }, // 36,
145 : { UnicodeScript_kLatinExtendedAdditional,
146 : UnicodeScript_kLatinExtendedAdditional,
147 : UnicodeScript_kLatinExtendedAdditional }, // 37,
148 : { UnicodeScript_kGreekExtended,
149 : UnicodeScript_kGreekExtended,
150 : UnicodeScript_kGreekExtended }, // 38,
151 : { UnicodeScript_kGeneralPunctuation,
152 : UnicodeScript_kGeneralPunctuation,
153 : UnicodeScript_kGeneralPunctuation }, // 39,
154 : { UnicodeScript_kSuperSubScript,
155 : UnicodeScript_kSuperSubScript,
156 : UnicodeScript_kSuperSubScript }, // 40,
157 : { UnicodeScript_kCurrencySymbolScript,
158 : UnicodeScript_kCurrencySymbolScript,
159 : UnicodeScript_kCurrencySymbolScript }, // 41,
160 : { UnicodeScript_kSymbolCombiningMark,
161 : UnicodeScript_kSymbolCombiningMark,
162 : UnicodeScript_kSymbolCombiningMark }, // 42,
163 : { UnicodeScript_kLetterlikeSymbol,
164 : UnicodeScript_kLetterlikeSymbol,
165 : UnicodeScript_kLetterlikeSymbol }, // 43,
166 : { UnicodeScript_kNumberForm,
167 : UnicodeScript_kNumberForm,
168 : UnicodeScript_kNumberForm }, // 44,
169 : { UnicodeScript_kArrow,
170 : UnicodeScript_kArrow,
171 : UnicodeScript_kArrow }, // 45,
172 : { UnicodeScript_kMathOperator,
173 : UnicodeScript_kMathOperator,
174 : UnicodeScript_kMathOperator }, // 46,
175 : { UnicodeScript_kMiscTechnical,
176 : UnicodeScript_kMiscTechnical,
177 : UnicodeScript_kMiscTechnical }, // 47,
178 : { UnicodeScript_kControlPicture,
179 : UnicodeScript_kControlPicture,
180 : UnicodeScript_kControlPicture }, // 48,
181 : { UnicodeScript_kOpticalCharacter,
182 : UnicodeScript_kOpticalCharacter,
183 : UnicodeScript_kOpticalCharacter }, // 49,
184 : { UnicodeScript_kEnclosedAlphanumeric,
185 : UnicodeScript_kEnclosedAlphanumeric,
186 : UnicodeScript_kEnclosedAlphanumeric }, // 50,
187 : { UnicodeScript_kBoxDrawing,
188 : UnicodeScript_kBoxDrawing,
189 : UnicodeScript_kBoxDrawing }, // 51,
190 : { UnicodeScript_kBlockElement,
191 : UnicodeScript_kBlockElement,
192 : UnicodeScript_kBlockElement }, // 52,
193 : { UnicodeScript_kGeometricShape,
194 : UnicodeScript_kGeometricShape,
195 : UnicodeScript_kGeometricShape }, // 53,
196 : { UnicodeScript_kMiscSymbol,
197 : UnicodeScript_kMiscSymbol,
198 : UnicodeScript_kMiscSymbol }, // 54,
199 : { UnicodeScript_kDingbat,
200 : UnicodeScript_kDingbat,
201 : UnicodeScript_kDingbat }, // 55,
202 : { UnicodeScript_kBraillePatterns,
203 : UnicodeScript_kBraillePatterns,
204 : UnicodeScript_kBraillePatterns }, // 56,
205 : { UnicodeScript_kCJKRadicalsSupplement,
206 : UnicodeScript_kCJKRadicalsSupplement,
207 : UnicodeScript_kCJKRadicalsSupplement }, // 57,
208 : { UnicodeScript_kKangxiRadicals,
209 : UnicodeScript_kKangxiRadicals,
210 : UnicodeScript_kKangxiRadicals }, // 58,
211 : { UnicodeScript_kIdeographicDescriptionCharacters,
212 : UnicodeScript_kIdeographicDescriptionCharacters,
213 : UnicodeScript_kIdeographicDescriptionCharacters }, // 59,
214 : { UnicodeScript_kCJKSymbolPunctuation,
215 : UnicodeScript_kCJKSymbolPunctuation,
216 : UnicodeScript_kCJKSymbolPunctuation }, // 60,
217 : { UnicodeScript_kHiragana,
218 : UnicodeScript_kHiragana,
219 : UnicodeScript_kHiragana }, // 61,
220 : { UnicodeScript_kKatakana,
221 : UnicodeScript_kKatakana,
222 : UnicodeScript_kKatakana }, // 62,
223 : { UnicodeScript_kBopomofo,
224 : UnicodeScript_kBopomofo,
225 : UnicodeScript_kBopomofo }, // 63,
226 : { UnicodeScript_kHangulCompatibilityJamo,
227 : UnicodeScript_kHangulCompatibilityJamo,
228 : UnicodeScript_kHangulCompatibilityJamo }, // 64,
229 : { UnicodeScript_kKanbun,
230 : UnicodeScript_kKanbun,
231 : UnicodeScript_kKanbun }, // 65,
232 : { UnicodeScript_kBopomofoExtended,
233 : UnicodeScript_kBopomofoExtended,
234 : UnicodeScript_kBopomofoExtended }, // 66,
235 : { UnicodeScript_kEnclosedCJKLetterMonth,
236 : UnicodeScript_kEnclosedCJKLetterMonth,
237 : UnicodeScript_kEnclosedCJKLetterMonth }, // 67,
238 : { UnicodeScript_kCJKCompatibility,
239 : UnicodeScript_kCJKCompatibility,
240 : UnicodeScript_kCJKCompatibility }, // 68,
241 : { UnicodeScript_k_CJKUnifiedIdeographsExtensionA,
242 : UnicodeScript_k_CJKUnifiedIdeographsExtensionA,
243 : UnicodeScript_k_CJKUnifiedIdeographsExtensionA }, // 69,
244 : { UnicodeScript_kCJKUnifiedIdeograph,
245 : UnicodeScript_kCJKUnifiedIdeograph,
246 : UnicodeScript_kCJKUnifiedIdeograph }, // 70,
247 : { UnicodeScript_kYiSyllables,
248 : UnicodeScript_kYiSyllables,
249 : UnicodeScript_kYiSyllables }, // 71,
250 : { UnicodeScript_kYiRadicals,
251 : UnicodeScript_kYiRadicals,
252 : UnicodeScript_kYiRadicals }, // 72,
253 : { UnicodeScript_kHangulSyllable,
254 : UnicodeScript_kHangulSyllable,
255 : UnicodeScript_kHangulSyllable }, // 73,
256 : { UnicodeScript_kHighSurrogate,
257 : UnicodeScript_kHighSurrogate,
258 : UnicodeScript_kHighSurrogate }, // 74,
259 : { UnicodeScript_kHighPrivateUseSurrogate,
260 : UnicodeScript_kHighPrivateUseSurrogate,
261 : UnicodeScript_kHighPrivateUseSurrogate }, // 75,
262 : { UnicodeScript_kLowSurrogate,
263 : UnicodeScript_kLowSurrogate,
264 : UnicodeScript_kLowSurrogate }, // 76,
265 : { UnicodeScript_kPrivateUse,
266 : UnicodeScript_kPrivateUse,
267 : UnicodeScript_kPrivateUse }, // 77,
268 : { UnicodeScript_kCJKCompatibilityIdeograph,
269 : UnicodeScript_kCJKCompatibilityIdeograph,
270 : UnicodeScript_kCJKCompatibilityIdeograph }, // 78,
271 : { UnicodeScript_kAlphabeticPresentation,
272 : UnicodeScript_kAlphabeticPresentation,
273 : UnicodeScript_kAlphabeticPresentation }, // 79,
274 : { UnicodeScript_kArabicPresentationA,
275 : UnicodeScript_kArabicPresentationA,
276 : UnicodeScript_kArabicPresentationA }, // 80,
277 : { UnicodeScript_kCombiningHalfMark,
278 : UnicodeScript_kCombiningHalfMark,
279 : UnicodeScript_kCombiningHalfMark }, // 81,
280 : { UnicodeScript_kCJKCompatibilityForm,
281 : UnicodeScript_kCJKCompatibilityForm,
282 : UnicodeScript_kCJKCompatibilityForm }, // 82,
283 : { UnicodeScript_kSmallFormVariant,
284 : UnicodeScript_kSmallFormVariant,
285 : UnicodeScript_kSmallFormVariant }, // 83,
286 : { UnicodeScript_kArabicPresentationB,
287 : UnicodeScript_kArabicPresentationB,
288 : UnicodeScript_kArabicPresentationB }, // 84,
289 : { UnicodeScript_kNoScript,
290 : UnicodeScript_kNoScript,
291 : UnicodeScript_kNoScript }, // 85,
292 : { UnicodeScript_kHalfwidthFullwidthForm,
293 : UnicodeScript_kHalfwidthFullwidthForm,
294 : UnicodeScript_kHalfwidthFullwidthForm }, // 86,
295 : { UnicodeScript_kScriptCount,
296 : UnicodeScript_kScriptCount,
297 : UnicodeScript_kNoScript } // 87,
298 : };
299 :
300 : sal_Int16 SAL_CALL
301 25 : unicode::getUnicodeScriptType( const sal_Unicode ch, ScriptTypeList* typeList, sal_Int16 unknownType ) {
302 :
303 25 : if (!typeList) {
304 0 : typeList = defaultTypeList;
305 0 : unknownType = UnicodeScript_kNoScript;
306 : }
307 :
308 25 : sal_Int16 i = 0, type = typeList[0].to;
309 55 : while (type < UnicodeScript_kScriptCount && ch > UnicodeScriptType[type][UnicodeScriptTypeTo]) {
310 5 : type = typeList[++i].to;
311 : }
312 :
313 : return (type < UnicodeScript_kScriptCount &&
314 25 : ch >= UnicodeScriptType[typeList[i].from][UnicodeScriptTypeFrom]) ?
315 50 : typeList[i].value : unknownType;
316 : }
317 :
318 : sal_Unicode SAL_CALL
319 0 : unicode::getUnicodeScriptStart( UnicodeScript type) {
320 0 : return UnicodeScriptType[type][UnicodeScriptTypeFrom];
321 : }
322 :
323 : sal_Unicode SAL_CALL
324 0 : unicode::getUnicodeScriptEnd( UnicodeScript type) {
325 0 : return UnicodeScriptType[type][UnicodeScriptTypeTo];
326 : }
327 :
328 : sal_Int16 SAL_CALL
329 0 : unicode::getUnicodeType( const sal_Unicode ch ) {
330 : static sal_Unicode c = 0x00;
331 : static sal_Int16 r = 0x00;
332 :
333 0 : if (ch == c) return r;
334 0 : else c = ch;
335 :
336 0 : sal_Int16 address = UnicodeTypeIndex[ch >> 8];
337 0 : return r = (sal_Int16)((address < UnicodeTypeNumberBlock) ? UnicodeTypeBlockValue[address] :
338 0 : UnicodeTypeValue[((address - UnicodeTypeNumberBlock) << 8) + (ch & 0xff)]);
339 : }
340 :
341 : sal_uInt8 SAL_CALL
342 0 : unicode::getUnicodeDirection( const sal_Unicode ch ) {
343 : static sal_Unicode c = 0x00;
344 : static sal_uInt8 r = 0x00;
345 :
346 0 : if (ch == c) return r;
347 0 : else c = ch;
348 :
349 0 : sal_Int16 address = UnicodeDirectionIndex[ch >> 8];
350 0 : return r = ((address < UnicodeDirectionNumberBlock) ? UnicodeDirectionBlockValue[address] :
351 0 : UnicodeDirectionValue[((address - UnicodeDirectionNumberBlock) << 8) + (ch & 0xff)]);
352 :
353 : }
354 :
355 : #define bit(name) (1 << name)
356 :
357 : #define UPPERMASK bit(UnicodeType::UPPERCASE_LETTER)
358 :
359 : #define LOWERMASK bit(UnicodeType::LOWERCASE_LETTER)
360 :
361 : #define TITLEMASK bit(UnicodeType::TITLECASE_LETTER)
362 :
363 : #define DIGITMASK bit(UnicodeType::DECIMAL_DIGIT_NUMBER)|\
364 : bit(UnicodeType::LETTER_NUMBER)|\
365 : bit(UnicodeType::OTHER_NUMBER)
366 :
367 : #define ALPHAMASK UPPERMASK|LOWERMASK|TITLEMASK|\
368 : bit(UnicodeType::MODIFIER_LETTER)|\
369 : bit(UnicodeType::OTHER_LETTER)
370 :
371 : #define BASEMASK DIGITMASK|ALPHAMASK|\
372 : bit(UnicodeType::NON_SPACING_MARK)|\
373 : bit(UnicodeType::ENCLOSING_MARK)|\
374 : bit(UnicodeType::COMBINING_SPACING_MARK)
375 :
376 : #define SPACEMASK bit(UnicodeType::SPACE_SEPARATOR)|\
377 : bit(UnicodeType::LINE_SEPARATOR)|\
378 : bit(UnicodeType::PARAGRAPH_SEPARATOR)
379 :
380 : #define PUNCTUATIONMASK bit(UnicodeType::DASH_PUNCTUATION)|\
381 : bit(UnicodeType::INITIAL_PUNCTUATION)|\
382 : bit(UnicodeType::FINAL_PUNCTUATION)|\
383 : bit(UnicodeType::CONNECTOR_PUNCTUATION)|\
384 : bit(UnicodeType::OTHER_PUNCTUATION)
385 :
386 : #define SYMBOLMASK bit(UnicodeType::MATH_SYMBOL)|\
387 : bit(UnicodeType::CURRENCY_SYMBOL)|\
388 : bit(UnicodeType::MODIFIER_SYMBOL)|\
389 : bit(UnicodeType::OTHER_SYMBOL)
390 :
391 : #define PRINTMASK BASEMASK|SPACEMASK|PUNCTUATIONMASK|SYMBOLMASK
392 :
393 : #define CONTROLMASK bit(UnicodeType::CONTROL)|\
394 : bit(UnicodeType::FORMAT)|\
395 : bit(UnicodeType::LINE_SEPARATOR)|\
396 : bit(UnicodeType::PARAGRAPH_SEPARATOR)
397 :
398 : #define IsType(func, mask) \
399 : sal_Bool SAL_CALL func( const sal_Unicode ch) {\
400 : return (bit(getUnicodeType(ch)) & (mask)) != 0;\
401 : }
402 :
403 0 : IsType(unicode::isUpper, UPPERMASK)
404 0 : IsType(unicode::isLower, LOWERMASK)
405 0 : IsType(unicode::isControl, CONTROLMASK)
406 0 : IsType(unicode::isPrint, PRINTMASK)
407 0 : IsType(unicode::isAlpha, ALPHAMASK)
408 0 : IsType(unicode::isDigit, DIGITMASK)
409 0 : IsType(unicode::isAlphaDigit, ALPHAMASK|DIGITMASK)
410 0 : IsType(unicode::isSpace, SPACEMASK)
411 :
412 : #define CONTROLSPACE bit(0x09)|bit(0x0a)|bit(0x0b)|bit(0x0c)|bit(0x0d)|\
413 : bit(0x1c)|bit(0x1d)|bit(0x1e)|bit(0x1f)
414 :
415 0 : sal_Bool SAL_CALL unicode::isWhiteSpace( const sal_Unicode ch) {
416 0 : return (ch != 0xa0 && isSpace(ch)) || (ch <= 0x1F && (bit(ch) & (CONTROLSPACE)));
417 : }
418 :
419 597 : sal_Int16 SAL_CALL unicode::getScriptClassFromUScriptCode(UScriptCode eScript)
420 : {
421 : //See unicode/uscript.h
422 : static sal_Int16 scriptTypes[] =
423 : {
424 : ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX,
425 : ScriptType::ASIAN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
426 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::LATIN,
427 : // 15
428 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN, ScriptType::COMPLEX,
429 : ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX,
430 : ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
431 : // 30
432 : ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
433 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
434 : ScriptType::LATIN, ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
435 : // 45
436 : ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX,
437 : ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN,
438 : ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
439 : // 60
440 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
441 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX,
442 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN, ScriptType::ASIAN,
443 : // 75
444 : ScriptType::COMPLEX, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
445 : ScriptType::LATIN, ScriptType::LATIN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
446 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
447 : // 90
448 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
449 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
450 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK, ScriptType::COMPLEX,
451 : // 105
452 : ScriptType::ASIAN, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
453 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
454 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::ASIAN,
455 : // 120
456 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
457 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::WEAK, ScriptType::WEAK,
458 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
459 : // 135
460 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
461 : ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX, ScriptType::COMPLEX,
462 : ScriptType::COMPLEX,
463 : ScriptType::WEAK
464 : };
465 :
466 : sal_Int16 nRet;
467 597 : if (eScript < USCRIPT_COMMON)
468 0 : nRet = ScriptType::WEAK;
469 597 : else if (static_cast<size_t>(eScript) >= SAL_N_ELEMENTS(scriptTypes))
470 0 : nRet = ScriptType::COMPLEX; // anything new is going to be pretty wild
471 : else
472 597 : nRet = scriptTypes[eScript];
473 597 : return nRet;
474 : }
475 :
476 0 : OString SAL_CALL unicode::getExemplerLanguageForUScriptCode(UScriptCode eScript)
477 : {
478 0 : OString sRet;
479 0 : switch (eScript)
480 : {
481 : case USCRIPT_CODE_LIMIT:
482 : case USCRIPT_INVALID_CODE:
483 0 : sRet = "zxx";
484 0 : break;
485 : case USCRIPT_COMMON:
486 : case USCRIPT_INHERITED:
487 0 : sRet = "und";
488 0 : break;
489 : case USCRIPT_MATHEMATICAL_NOTATION:
490 : case USCRIPT_SYMBOLS:
491 0 : sRet = "zxx";
492 0 : break;
493 : case USCRIPT_UNWRITTEN_LANGUAGES:
494 : case USCRIPT_UNKNOWN:
495 0 : sRet = "und";
496 0 : break;
497 : case USCRIPT_NABATAEAN: //no language with an assigned code yet
498 0 : sRet = "mis";
499 0 : break;
500 : case USCRIPT_PALMYRENE: //no language with an assigned code yet
501 0 : sRet = "mis";
502 0 : break;
503 : case USCRIPT_ARABIC:
504 0 : sRet = "ar";
505 0 : break;
506 : case USCRIPT_ARMENIAN:
507 0 : sRet = "hy";
508 0 : break;
509 : case USCRIPT_BENGALI:
510 0 : sRet = "bn";
511 0 : break;
512 : case USCRIPT_BOPOMOFO:
513 0 : sRet = "zh";
514 0 : break;
515 : case USCRIPT_CHEROKEE:
516 0 : sRet = "chr";
517 0 : break;
518 : case USCRIPT_COPTIC:
519 0 : sRet = "cop";
520 0 : break;
521 : case USCRIPT_CYRILLIC:
522 0 : sRet = "ru";
523 0 : break;
524 : case USCRIPT_DESERET:
525 0 : sRet = "en";
526 0 : break;
527 : case USCRIPT_DEVANAGARI:
528 0 : sRet = "hi";
529 0 : break;
530 : case USCRIPT_ETHIOPIC:
531 0 : sRet = "am";
532 0 : break;
533 : case USCRIPT_GEORGIAN:
534 0 : sRet = "ka";
535 0 : break;
536 : case USCRIPT_GOTHIC:
537 0 : sRet = "got";
538 0 : break;
539 : case USCRIPT_GREEK:
540 0 : sRet = "el";
541 0 : break;
542 : case USCRIPT_GUJARATI:
543 0 : sRet = "gu";
544 0 : break;
545 : case USCRIPT_GURMUKHI:
546 0 : sRet = "pa";
547 0 : break;
548 : case USCRIPT_HAN:
549 0 : sRet = "zh";
550 0 : break;
551 : case USCRIPT_HANGUL:
552 0 : sRet = "ko";
553 0 : break;
554 : case USCRIPT_HEBREW:
555 0 : sRet = "hr";
556 0 : break;
557 : case USCRIPT_HIRAGANA:
558 0 : sRet = "ja";
559 0 : break;
560 : case USCRIPT_KANNADA:
561 0 : sRet = "kn";
562 0 : break;
563 : case USCRIPT_KATAKANA:
564 0 : sRet = "ja";
565 0 : break;
566 : case USCRIPT_KHMER:
567 0 : sRet = "km";
568 0 : break;
569 : case USCRIPT_LAO:
570 0 : sRet = "lo";
571 0 : break;
572 : case USCRIPT_LATIN:
573 0 : sRet = "en";
574 0 : break;
575 : case USCRIPT_MALAYALAM:
576 0 : sRet = "ml";
577 0 : break;
578 : case USCRIPT_MONGOLIAN:
579 0 : sRet = "mn";
580 0 : break;
581 : case USCRIPT_MYANMAR:
582 0 : sRet = "my";
583 0 : break;
584 : case USCRIPT_OGHAM:
585 0 : sRet = "pgl";
586 0 : break;
587 : case USCRIPT_OLD_ITALIC:
588 0 : sRet = "osc";
589 0 : break;
590 : case USCRIPT_ORIYA:
591 0 : sRet = "or";
592 0 : break;
593 : case USCRIPT_RUNIC:
594 0 : sRet = "ang";
595 0 : break;
596 : case USCRIPT_SINHALA:
597 0 : sRet = "si";
598 0 : break;
599 : case USCRIPT_SYRIAC:
600 0 : sRet = "syr";
601 0 : break;
602 : case USCRIPT_TAMIL:
603 0 : sRet = "ta";
604 0 : break;
605 : case USCRIPT_TELUGU:
606 0 : sRet = "te";
607 0 : break;
608 : case USCRIPT_THAANA:
609 0 : sRet = "dv";
610 0 : break;
611 : case USCRIPT_THAI:
612 0 : sRet = "th";
613 0 : break;
614 : case USCRIPT_TIBETAN:
615 0 : sRet = "bo";
616 0 : break;
617 : case USCRIPT_CANADIAN_ABORIGINAL:
618 0 : sRet = "iu";
619 0 : break;
620 : case USCRIPT_YI:
621 0 : sRet = "ii";
622 0 : break;
623 : case USCRIPT_TAGALOG:
624 0 : sRet = "tl";
625 0 : break;
626 : case USCRIPT_HANUNOO:
627 0 : sRet = "hnn";
628 0 : break;
629 : case USCRIPT_BUHID:
630 0 : sRet = "bku";
631 0 : break;
632 : case USCRIPT_TAGBANWA:
633 0 : sRet = "tbw";
634 0 : break;
635 : case USCRIPT_BRAILLE:
636 0 : sRet = "en";
637 0 : break;
638 : case USCRIPT_CYPRIOT:
639 0 : sRet = "ecy";
640 0 : break;
641 : case USCRIPT_LIMBU:
642 0 : sRet = "lif";
643 0 : break;
644 : case USCRIPT_LINEAR_B:
645 0 : sRet = "gmy";
646 0 : break;
647 : case USCRIPT_OSMANYA:
648 0 : sRet = "so";
649 0 : break;
650 : case USCRIPT_SHAVIAN:
651 0 : sRet = "en";
652 0 : break;
653 : case USCRIPT_TAI_LE:
654 0 : sRet = "tdd";
655 0 : break;
656 : case USCRIPT_UGARITIC:
657 0 : sRet = "uga";
658 0 : break;
659 : case USCRIPT_KATAKANA_OR_HIRAGANA:
660 0 : sRet = "ja";
661 0 : break;
662 : case USCRIPT_BUGINESE:
663 0 : sRet = "bug";
664 0 : break;
665 : case USCRIPT_GLAGOLITIC:
666 0 : sRet = "ch";
667 0 : break;
668 : case USCRIPT_KHAROSHTHI:
669 0 : sRet = "pra";
670 0 : break;
671 : case USCRIPT_SYLOTI_NAGRI:
672 0 : sRet = "syl";
673 0 : break;
674 : case USCRIPT_NEW_TAI_LUE:
675 0 : sRet = "khb";
676 0 : break;
677 : case USCRIPT_TIFINAGH:
678 0 : sRet = "tmh";
679 0 : break;
680 : case USCRIPT_OLD_PERSIAN:
681 0 : sRet = "peo";
682 0 : break;
683 : case USCRIPT_BALINESE:
684 0 : sRet = "ban";
685 0 : break;
686 : case USCRIPT_BATAK:
687 0 : sRet = "btk";
688 0 : break;
689 : case USCRIPT_BLISSYMBOLS:
690 0 : sRet = "en";
691 0 : break;
692 : case USCRIPT_BRAHMI:
693 0 : sRet = "pra";
694 0 : break;
695 : case USCRIPT_CHAM:
696 0 : sRet = "cja";
697 0 : break;
698 : case USCRIPT_CIRTH:
699 0 : sRet = "sjn";
700 0 : break;
701 : case USCRIPT_OLD_CHURCH_SLAVONIC_CYRILLIC:
702 0 : sRet = "cu";
703 0 : break;
704 : case USCRIPT_DEMOTIC_EGYPTIAN:
705 : case USCRIPT_HIERATIC_EGYPTIAN:
706 : case USCRIPT_EGYPTIAN_HIEROGLYPHS:
707 0 : sRet = "egy";
708 0 : break;
709 : case USCRIPT_KHUTSURI:
710 0 : sRet = "ka";
711 0 : break;
712 : case USCRIPT_SIMPLIFIED_HAN:
713 0 : sRet = "zh";
714 0 : break;
715 : case USCRIPT_TRADITIONAL_HAN:
716 0 : sRet = "zh";
717 0 : break;
718 : case USCRIPT_PAHAWH_HMONG:
719 0 : sRet = "blu";
720 0 : break;
721 : case USCRIPT_OLD_HUNGARIAN:
722 0 : sRet = "ohu";
723 0 : break;
724 : case USCRIPT_HARAPPAN_INDUS:
725 0 : sRet = "xiv";
726 0 : break;
727 : case USCRIPT_JAVANESE:
728 0 : sRet = "kaw";
729 0 : break;
730 : case USCRIPT_KAYAH_LI:
731 0 : sRet = "eky";
732 0 : break;
733 : case USCRIPT_LATIN_FRAKTUR:
734 0 : sRet = "de";
735 0 : break;
736 : case USCRIPT_LATIN_GAELIC:
737 0 : sRet = "ga";
738 0 : break;
739 : case USCRIPT_LEPCHA:
740 0 : sRet = "lep";
741 0 : break;
742 : case USCRIPT_LINEAR_A:
743 0 : sRet = "ecr";
744 0 : break;
745 : case USCRIPT_MANDAIC:
746 0 : sRet = "mic";
747 0 : break;
748 : case USCRIPT_MAYAN_HIEROGLYPHS:
749 0 : sRet = "myn";
750 0 : break;
751 : case USCRIPT_MEROITIC:
752 0 : sRet = "xmr";
753 0 : break;
754 : case USCRIPT_NKO:
755 0 : sRet = "nqo";
756 0 : break;
757 : case USCRIPT_ORKHON:
758 0 : sRet = "otk";
759 0 : break;
760 : case USCRIPT_OLD_PERMIC:
761 0 : sRet = "kv";
762 0 : break;
763 : case USCRIPT_PHAGS_PA:
764 0 : sRet = "xng";
765 0 : break;
766 : case USCRIPT_PHOENICIAN:
767 0 : sRet = "phn";
768 0 : break;
769 : case USCRIPT_PHONETIC_POLLARD:
770 0 : sRet = "hmd";
771 0 : break;
772 : case USCRIPT_RONGORONGO:
773 0 : sRet = "rap";
774 0 : break;
775 : case USCRIPT_SARATI:
776 0 : sRet = "qya";
777 0 : break;
778 : case USCRIPT_ESTRANGELO_SYRIAC:
779 0 : sRet = "syr";
780 0 : break;
781 : case USCRIPT_WESTERN_SYRIAC:
782 0 : sRet = "tru";
783 0 : break;
784 : case USCRIPT_EASTERN_SYRIAC:
785 0 : sRet = "aii";
786 0 : break;
787 : case USCRIPT_TENGWAR:
788 0 : sRet = "sjn";
789 0 : break;
790 : case USCRIPT_VAI:
791 0 : sRet = "vai";
792 0 : break;
793 : case USCRIPT_VISIBLE_SPEECH:
794 0 : sRet = "en";
795 0 : break;
796 : case USCRIPT_CUNEIFORM:
797 0 : sRet = "akk";
798 0 : break;
799 : case USCRIPT_CARIAN:
800 0 : sRet = "xcr";
801 0 : break;
802 : case USCRIPT_JAPANESE:
803 0 : sRet = "ja";
804 0 : break;
805 : case USCRIPT_LANNA:
806 0 : sRet = "nod";
807 0 : break;
808 : case USCRIPT_LYCIAN:
809 0 : sRet = "xlc";
810 0 : break;
811 : case USCRIPT_LYDIAN:
812 0 : sRet = "xld";
813 0 : break;
814 : case USCRIPT_OL_CHIKI:
815 0 : sRet = "sat";
816 0 : break;
817 : case USCRIPT_REJANG:
818 0 : sRet = "rej";
819 0 : break;
820 : case USCRIPT_SAURASHTRA:
821 0 : sRet = "saz";
822 0 : break;
823 : case USCRIPT_SIGN_WRITING:
824 0 : sRet = "en";
825 0 : break;
826 : case USCRIPT_SUNDANESE:
827 0 : sRet = "su";
828 0 : break;
829 : case USCRIPT_MOON:
830 0 : sRet = "en";
831 0 : break;
832 : case USCRIPT_MEITEI_MAYEK:
833 0 : sRet = "mni";
834 0 : break;
835 : case USCRIPT_IMPERIAL_ARAMAIC:
836 0 : sRet = "arc";
837 0 : break;
838 : case USCRIPT_AVESTAN:
839 0 : sRet = "ae";
840 0 : break;
841 : case USCRIPT_CHAKMA:
842 0 : sRet = "ccp";
843 0 : break;
844 : case USCRIPT_KOREAN:
845 0 : sRet = "ko";
846 0 : break;
847 : case USCRIPT_KAITHI:
848 0 : sRet = "awa";
849 0 : break;
850 : case USCRIPT_MANICHAEAN:
851 0 : sRet = "xmn";
852 0 : break;
853 : case USCRIPT_INSCRIPTIONAL_PAHLAVI:
854 : case USCRIPT_PSALTER_PAHLAVI:
855 : case USCRIPT_BOOK_PAHLAVI:
856 : case USCRIPT_INSCRIPTIONAL_PARTHIAN:
857 0 : sRet = "xpr";
858 0 : break;
859 : case USCRIPT_SAMARITAN:
860 0 : sRet = "heb";
861 0 : break;
862 : case USCRIPT_TAI_VIET:
863 0 : sRet = "blt";
864 0 : break;
865 : case USCRIPT_BAMUM:
866 0 : sRet = "bax";
867 0 : break;
868 : case USCRIPT_LISU:
869 0 : sRet = "lis";
870 0 : break;
871 : case USCRIPT_NAKHI_GEBA:
872 0 : sRet = "nxq";
873 0 : break;
874 : case USCRIPT_OLD_SOUTH_ARABIAN:
875 0 : sRet = "xsa";
876 0 : break;
877 : case USCRIPT_BASSA_VAH:
878 0 : sRet = "bsq";
879 0 : break;
880 : case USCRIPT_DUPLOYAN_SHORTAND:
881 0 : sRet = "fr";
882 0 : break;
883 : case USCRIPT_ELBASAN:
884 0 : sRet = "sq";
885 0 : break;
886 : case USCRIPT_GRANTHA:
887 0 : sRet = "ta";
888 0 : break;
889 : case USCRIPT_KPELLE:
890 0 : sRet = "kpe";
891 0 : break;
892 : case USCRIPT_LOMA:
893 0 : sRet = "lom";
894 0 : break;
895 : case USCRIPT_MENDE:
896 0 : sRet = "men";
897 0 : break;
898 : case USCRIPT_MEROITIC_CURSIVE:
899 0 : sRet = "xmr";
900 0 : break;
901 : case USCRIPT_OLD_NORTH_ARABIAN:
902 0 : sRet = "xna";
903 0 : break;
904 : case USCRIPT_SINDHI:
905 0 : sRet = "sd";
906 0 : break;
907 : case USCRIPT_WARANG_CITI:
908 0 : sRet = "hoc";
909 0 : break;
910 : #if (U_ICU_VERSION_MAJOR_NUM > 4) || (U_ICU_VERSION_MAJOR_NUM == 4 && U_ICU_VERSION_MINOR_NUM >= 8)
911 : case USCRIPT_AFAKA:
912 0 : sRet = "djk";
913 0 : break;
914 : case USCRIPT_JURCHEN:
915 0 : sRet = "juc";
916 0 : break;
917 : case USCRIPT_MRO:
918 0 : sRet = "cmr";
919 0 : break;
920 : case USCRIPT_NUSHU: //no language with an assigned code yet
921 0 : sRet = "mis";
922 0 : break;
923 : case USCRIPT_SHARADA:
924 0 : sRet = "sa";
925 0 : break;
926 : case USCRIPT_SORA_SOMPENG:
927 0 : sRet = "srb";
928 0 : break;
929 : case USCRIPT_TAKRI:
930 0 : sRet = "doi";
931 0 : break;
932 : case USCRIPT_TANGUT:
933 0 : sRet = "txg";
934 0 : break;
935 : case USCRIPT_WOLEAI:
936 0 : sRet = "woe";
937 0 : break;
938 : #endif
939 : #if (U_ICU_VERSION_MAJOR_NUM > 4)
940 : case USCRIPT_ANATOLIAN_HIEROGLYPHS:
941 : sRet = "hlu";
942 : break;
943 : case USCRIPT_KHOJKI:
944 : sRet = "gu";
945 : break;
946 : case USCRIPT_TIRHUTA:
947 : sRet = "mai";
948 : break;
949 : #endif
950 : }
951 0 : return sRet;
952 : }
953 :
954 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|