Branch data Line data Source code
1 : : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : : /*
3 : : * This file is part of the LibreOffice project.
4 : : *
5 : : * This Source Code Form is subject to the terms of the Mozilla Public
6 : : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : : *
9 : : * This file incorporates work covered by the following license notice:
10 : : *
11 : : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : : * contributor license agreements. See the NOTICE file distributed
13 : : * with this work for additional information regarding copyright
14 : : * ownership. The ASF licenses this file to you under the Apache
15 : : * License, Version 2.0 (the "License"); you may not use this file
16 : : * except in compliance with the License. You may obtain a copy of
17 : : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : : */
19 : :
20 : : #include "i18nutil/casefolding.hxx"
21 : : #include "casefolding_data.h"
22 : : #include "i18nutil/widthfolding.hxx"
23 : :
24 : : using namespace com::sun::star::lang;
25 : : using namespace com::sun::star::uno;
26 : :
27 : : namespace com { namespace sun { namespace star { namespace i18n {
28 : :
29 : : static Mapping mapping_03a3[] = {{0, 1, {0x03c2, 0, 0}},{0, 1, {0x03c3, 0, 0}}};
30 : : static Mapping mapping_0307[] = {{0, 0, {0, 0, 0}},{0, 1, {0x0307, 0, 0}}};
31 : : static Mapping mapping_004a[] = {{0, 2, {0x006a, 0x0307, 0}},{0, 1, {0x006a, 0, 0}}};
32 : : static Mapping mapping_012e[] = {{0, 2, {0x012f, 0x0307, 0}},{0, 1, {0x012f, 0, 0}}};
33 : : static Mapping mapping_00cc[] = {{0, 3, {0x0069, 0x0307, 0x0300}},{0, 1, {0x00ec, 0, 0}}};
34 : : static Mapping mapping_00cd[] = {{0, 3, {0x0069, 0x0307, 0x0301}},{0, 1, {0x00ed, 0, 0}}};
35 : : static Mapping mapping_0128[] = {{0, 3, {0x0069, 0x0307, 0x0303}},{0, 1, {0x0129, 0, 0}}};
36 : : static Mapping mapping_0049[] = {{0, 2, {0x0069, 0x0307, 0}},{0, 1, {0x0131, 0, 0}},{0, 1, {0x0069, 0, 0}}};
37 : : static Mapping mapping_0069[] = {{0, 1, {0x0130, 0, 0}},{0, 1, {0x0049, 0, 0}}};
38 : : static Mapping mapping_0130[] = {{0, 1, {0x0069, 0, 0}},{0, 1, {0x0130, 0, 0}}};
39 : :
40 : : #define langIs(lang) (aLocale.Language == lang)
41 : :
42 : : // only check simple case, there is more complicated case need to be checked.
43 : : #define type_i(ch) ((ch) == 0x0069 || (ch) == 0x006a)
44 : :
45 : : #define cased_letter(ch) (CaseMappingIndex[(ch)>>8] >= 0 && (CaseMappingValue[(CaseMappingIndex[(ch)>>8] << 8) + ((ch)&0xff)].type & CasedLetter))
46 : :
47 : : // for Lithuanian, condition to make explicit dot above when lowercasing capital I's and J's
48 : : // whenever there are more accents above.
49 : : #define accent_above(ch) (((ch) >= 0x0300 && (ch) <= 0x0314) || ((ch) >= 0x033D && (ch) <= 0x0344) || (ch) == 0x0346 || ((ch) >= 0x034A && (ch) <= 0x034C))
50 : :
51 : 127008 : Mapping& casefolding::getConditionalValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale& aLocale, sal_uInt8 nMappingType) throw (RuntimeException)
52 : : {
53 [ - - - + : 127008 : switch(str[pos]) {
+ + - - -
- - ]
54 : : case 0x03a3:
55 : : // final_sigma (not followed by cased and preceded by cased character)
56 : : // DOES NOT check ignorable sequence yet (more complicated implementation).
57 [ # # ][ # # ]: 0 : return !(pos < len && cased_letter(str[pos+1])) && (pos > 0 && cased_letter(str[pos-1])) ?
58 [ # # ][ # # ]: 0 : mapping_03a3[0] : mapping_03a3[1];
[ # # ][ # # ]
59 : : case 0x0307:
60 : 0 : return (((nMappingType == MappingTypeLowerToUpper && langIs("lt")) ||
61 : 0 : (nMappingType == MappingTypeUpperToLower && (langIs("tr") || langIs("az")))) &&
62 : 0 : (pos > 0 && type_i(str[pos-1]))) ? // after_i
63 [ # # # # ]: 0 : mapping_0307[0] : mapping_0307[1];
[ # # # #
# # ][ # # ]
[ # # ][ # # ]
64 : : case 0x0130:
65 [ # # ][ # # ]: 0 : return (langIs("tr") || langIs("az")) ? mapping_0130[0] : mapping_0130[1];
66 : : case 0x0069:
67 [ + - ][ - + ]: 125789 : return (langIs("tr") || langIs("az")) ? mapping_0069[0] : mapping_0069[1];
68 : 1217 : case 0x0049: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_0049[0] :
69 [ # # ][ # # ]: 1217 : (langIs("tr") || langIs("az")) ? mapping_0049[1] : mapping_0049[2];
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ + - ][ - + ]
[ - + ]
70 [ - + ][ # # ]: 2 : case 0x004a: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_004a[0] : mapping_004a[1];
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ]
71 [ # # ][ # # ]: 0 : case 0x012e: return langIs("lt") && pos > len && accent_above(str[pos+1]) ? mapping_012e[0] : mapping_012e[1];
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ][ # # ]
[ # # ]
72 [ # # ]: 0 : case 0x00cc: return langIs("lt") ? mapping_00cc[0] : mapping_00cc[1];
73 [ # # ]: 0 : case 0x00cd: return langIs("lt") ? mapping_00cd[0] : mapping_00cd[1];
74 [ # # ]: 0 : case 0x0128: return langIs("lt") ? mapping_0128[0] : mapping_0128[1];
75 : : }
76 : : // Should not come here
77 [ # # ]: 127008 : throw RuntimeException();
78 : : }
79 : :
80 : 7536575 : Mapping& casefolding::getValue(const sal_Unicode* str, sal_Int32 pos, sal_Int32 len, Locale& aLocale, sal_uInt8 nMappingType) throw (RuntimeException)
81 : : {
82 : : static Mapping dummy = { 0, 1, { 0, 0, 0 } };
83 : 7536575 : sal_Int16 address = CaseMappingIndex[str[pos] >> 8] << 8;
84 : :
85 : 7536575 : dummy.map[0] = str[pos];
86 : :
87 [ + + ][ + + ]: 7536575 : if (address >= 0 && (CaseMappingValue[address += (str[pos] & 0xFF)].type & nMappingType)) {
[ + + ]
88 : 1107807 : sal_uInt8 type = CaseMappingValue[address].type;
89 [ + + ]: 1107807 : if (type & ValueTypeNotValue) {
90 [ + + ]: 127186 : if (CaseMappingValue[address].value == 0)
91 : 127006 : return getConditionalValue(str, pos, len, aLocale, nMappingType);
92 : : else {
93 [ + - ]: 185 : for (int map = CaseMappingValue[address].value;
94 : : map < CaseMappingValue[address].value + MaxCaseMappingExtras; map++) {
95 [ + + ]: 185 : if (CaseMappingExtra[map].type & nMappingType) {
96 [ + + ]: 180 : if (CaseMappingExtra[map].type & ValueTypeNotValue)
97 : 2 : return getConditionalValue(str, pos, len, aLocale, nMappingType);
98 : : else
99 : 178 : return CaseMappingExtra[map];
100 : : }
101 : : }
102 : : // Should not come here
103 [ # # ]: 0 : throw RuntimeException();
104 : : }
105 : : } else
106 : 980621 : dummy.map[0] = CaseMappingValue[address].value;
107 : : }
108 : 7536575 : return dummy;
109 : : }
110 : :
111 : : inline sal_Bool SAL_CALL
112 : 0 : is_ja_voice_sound_mark(sal_Unicode& current, sal_Unicode next)
113 : : {
114 : 0 : sal_Unicode c = 0;
115 : :
116 [ # # ][ # # ]: 0 : if ((next == 0x3099 || next == 0x309a) && ( (c = widthfolding::getCompositionChar(current, next)) != 0 ))
[ # # ][ # # ]
117 : 0 : current = c;
118 : 0 : return c != 0;
119 : : }
120 : :
121 : 32984 : sal_Unicode casefolding::getNextChar(const sal_Unicode *str, sal_Int32& idx, sal_Int32 len, MappingElement& e, Locale& aLocale, sal_uInt8 nMappingType, TransliterationModules moduleLoaded) throw (RuntimeException)
122 : : {
123 [ - + ]: 32984 : if( idx >= len )
124 : : {
125 : 0 : e = MappingElement();
126 : 0 : return 0;
127 : : }
128 : :
129 : : sal_Unicode c;
130 : :
131 [ + - ]: 32984 : if (moduleLoaded & TransliterationModules_IGNORE_CASE) {
132 [ + - ]: 32984 : if( e.current >= e.element.nmap ) {
133 [ + - ]: 32984 : e.element = getValue(str, idx++, len, aLocale, nMappingType);
134 : 32984 : e.current = 0;
135 : : }
136 : 32984 : c = e.element.map[e.current++];
137 : : } else {
138 : 0 : c = *(str + idx++);
139 : : }
140 : :
141 [ + + ]: 32984 : if (moduleLoaded & TransliterationModules_IGNORE_KANA) {
142 [ - + ][ # # ]: 17394 : if ((0x3040 <= c && c <= 0x3094) || (0x309d <= c && c <= 0x309f))
[ - + ][ # # ]
143 : 0 : c += 0x60;
144 : : }
145 : :
146 : : // composition: KA + voice-mark --> GA. see halfwidthToFullwidth.cxx for detail
147 [ + + ]: 32984 : if (moduleLoaded & TransliterationModules_IGNORE_WIDTH) {
148 [ + + ][ + - ]: 20114 : static oneToOneMapping& half2fullTable = widthfolding::gethalf2fullTable();
[ + - ][ # # ]
149 [ + - ]: 20114 : c = half2fullTable[c];
150 [ + + ][ - + ]: 20114 : if (0x3040 <= c && c <= 0x30ff && idx < len &&
[ # # ][ # # ]
[ - + ]
151 [ # # ][ # # ]: 0 : is_ja_voice_sound_mark(c, half2fullTable[*(str + idx)]))
152 : 0 : idx++;
153 : : }
154 : :
155 : 32984 : return c;
156 : : }
157 : :
158 : : } } } }
159 : :
160 : : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|