Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : // prevent internal compiler error with MSVC6SP3
21 : #include <utility>
22 : #include <i18nutil/widthfolding.hxx>
23 : #include <comphelper/string.hxx>
24 : #include "widthfolding_data.h"
25 :
26 : using namespace com::sun::star::uno;
27 :
28 :
29 : namespace com { namespace sun { namespace star { namespace i18n {
30 :
31 0 : sal_Unicode widthfolding::decompose_ja_voiced_sound_marksChar2Char (sal_Unicode inChar)
32 : {
33 0 : if (0x30a0 <= inChar && inChar <= 0x30ff) {
34 0 : sal_Int16 i = inChar - 0x3040;
35 0 : if (decomposition_table[i].decomposited_character_1)
36 0 : return 0xFFFF;
37 : }
38 0 : return inChar;
39 : }
40 :
41 : /**
42 : * Decompose Japanese specific voiced and semi-voiced sound marks.
43 : */
44 0 : OUString widthfolding::decompose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset )
45 : {
46 : // Create a string buffer which can hold nCount * 2 + 1 characters.
47 : // Its size may become double of nCount.
48 : // The reference count is 1 now.
49 0 : rtl_uString * newStr = rtl_uString_alloc(nCount * 2);
50 :
51 0 : sal_Int32 *p = NULL;
52 0 : sal_Int32 position = 0;
53 0 : if (useOffset) {
54 : // Allocate double of nCount length to offset argument.
55 0 : offset.realloc( nCount * 2 );
56 0 : p = offset.getArray();
57 0 : position = startPos;
58 : }
59 :
60 : // Prepare pointers of unicode character arrays.
61 0 : const sal_Unicode* src = inStr.getStr() + startPos;
62 0 : sal_Unicode* dst = newStr->buffer;
63 :
64 : // Decomposition: GA --> KA + voice-mark
65 0 : while (nCount -- > 0) {
66 0 : sal_Unicode c = *src++;
67 : // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F)
68 : // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF)
69 : // Hiragana is not applied to decomposition.
70 : // Only Katakana is applied to decomposition
71 0 : if (0x30a0 <= c && c <= 0x30ff) {
72 0 : int i = int(c - 0x3040);
73 0 : sal_Unicode first = decomposition_table[i].decomposited_character_1;
74 0 : if (first != 0x0000) {
75 0 : *dst ++ = first;
76 0 : *dst ++ = decomposition_table[i].decomposited_character_2; // second
77 0 : if (useOffset) {
78 0 : *p ++ = position;
79 0 : *p ++ = position ++;
80 : }
81 0 : continue;
82 : }
83 : }
84 0 : *dst ++ = c;
85 0 : if (useOffset)
86 0 : *p ++ = position ++;
87 : }
88 0 : *dst = (sal_Unicode) 0;
89 :
90 0 : newStr->length = sal_Int32(dst - newStr->buffer);
91 0 : if (useOffset)
92 0 : offset.realloc(newStr->length);
93 0 : return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
94 : }
95 :
96 0 : oneToOneMapping& widthfolding::getfull2halfTable(void)
97 : {
98 0 : static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_NORMAL);
99 0 : table.makeIndex();
100 0 : return table;
101 : }
102 :
103 : /**
104 : * Compose Japanese specific voiced and semi-voiced sound marks.
105 : */
106 0 : OUString widthfolding::compose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, bool useOffset, sal_Int32 nFlags )
107 : {
108 : // Create a string buffer which can hold nCount + 1 characters.
109 : // Its size may become equal to nCount or smaller.
110 : // The reference count is 1 now.
111 0 : rtl_uString * newStr = rtl_uString_alloc(nCount);
112 :
113 : // Prepare pointers of unicode character arrays.
114 0 : const sal_Unicode* src = inStr.getStr() + startPos;
115 0 : sal_Unicode* dst = newStr->buffer;
116 :
117 : // This conversion algorithm requires at least one character.
118 0 : if (nCount > 0) {
119 :
120 : // .. .. KA VOICE .. ..
121 : // ^ ^
122 : // previousChar currentChar
123 : // ^
124 : // position
125 : //
126 : // will be converted to
127 : // .. .. GA .. ..
128 :
129 0 : sal_Int32 *p = NULL;
130 0 : sal_Int32 position = 0;
131 0 : if (useOffset) {
132 : // Allocate nCount length to offset argument.
133 0 : offset.realloc( nCount );
134 0 : p = offset.getArray();
135 0 : position = startPos;
136 : }
137 :
138 : //
139 0 : sal_Unicode previousChar = *src ++;
140 : sal_Unicode currentChar;
141 :
142 : // Composition: KA + voice-mark --> GA
143 0 : while (-- nCount > 0) {
144 0 : currentChar = *src ++;
145 : // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F)
146 : // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF)
147 : // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
148 : // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
149 : // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK
150 : // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
151 0 : int j = currentChar - 0x3099; // 0x3099, 0x309a, 0x309b, 0x309c ?
152 :
153 0 : if (2 <= j && j <= 3) // 0x309b or 0x309c
154 0 : j -= 2;
155 :
156 0 : if (0 <= j && j <= 1) {
157 : // 0 addresses a code point regarding 0x3099 or 0x309b (voiced sound mark),
158 : // 1 is 0x309a or 0x309c (semi-voiced sound mark)
159 0 : int i = int(previousChar - 0x3040); // i acts as an index of array
160 0 : bool bCompose = false;
161 :
162 0 : if (0 <= i && i <= (0x30ff - 0x3040) && composition_table[i][j])
163 0 : bCompose = true;
164 :
165 : // not to use combined KATAKANA LETTER VU
166 0 : if ( previousChar == 0x30a6 && (nFlags & WIDTHFOLDNIG_DONT_USE_COMBINED_VU) )
167 0 : bCompose = false;
168 :
169 0 : if( bCompose ){
170 0 : if (useOffset) {
171 0 : position ++;
172 0 : *p ++ = position ++;
173 : }
174 0 : *dst ++ = composition_table[i][j];
175 0 : previousChar = *src ++;
176 0 : nCount --;
177 0 : continue;
178 : }
179 : }
180 0 : if (useOffset)
181 0 : *p ++ = position ++;
182 0 : *dst ++ = previousChar;
183 0 : previousChar = currentChar;
184 : }
185 :
186 0 : if (nCount == 0) {
187 0 : if (useOffset)
188 0 : *p = position;
189 0 : *dst ++ = previousChar;
190 : }
191 :
192 0 : *dst = (sal_Unicode) 0;
193 :
194 0 : newStr->length = sal_Int32(dst - newStr->buffer);
195 : }
196 0 : if (useOffset)
197 0 : offset.realloc(newStr->length);
198 0 : return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
199 : }
200 :
201 0 : oneToOneMapping& widthfolding::gethalf2fullTable(void)
202 : {
203 0 : static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_NORMAL);
204 0 : table.makeIndex();
205 0 : return table;
206 : }
207 :
208 0 : sal_Unicode widthfolding::getCompositionChar(sal_Unicode c1, sal_Unicode c2)
209 : {
210 0 : return composition_table[c1 - 0x3040][c2 - 0x3099];
211 : }
212 :
213 :
214 0 : oneToOneMapping& widthfolding::getfull2halfTableForASC()
215 : {
216 0 : static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_ASC_FUNCTION);
217 0 : table.makeIndex();
218 :
219 : // bluedwarf: dirty hack!
220 : // There is an exception. Additional conversion is required following:
221 : // 0xFFE5 (FULLWIDTH YEN SIGN) --> 0x005C (REVERSE SOLIDUS)
222 : //
223 : // See the following page for detail:
224 : // http://wiki.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions
225 : int i, j, high, low;
226 0 : int n = sizeof(full2halfASCException) / sizeof(UnicodePairWithFlag);
227 0 : for( i = 0; i < n; i++ )
228 : {
229 0 : high = (full2halfASCException[i].first >> 8) & 0xFF;
230 0 : low = (full2halfASCException[i].first) & 0xFF;
231 :
232 0 : if( !table.mpIndex[high] )
233 : {
234 0 : table.mpIndex[high] = new UnicodePairWithFlag*[256];
235 :
236 0 : for( j = 0; j < 256; j++ )
237 0 : table.mpIndex[high][j] = NULL;
238 : }
239 0 : table.mpIndex[high][low] = &full2halfASCException[i];
240 : }
241 :
242 0 : return table;
243 : }
244 :
245 0 : oneToOneMapping& widthfolding::gethalf2fullTableForJIS()
246 : {
247 0 : static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_JIS_FUNCTION);
248 0 : table.makeIndex();
249 :
250 : // bluedwarf: dirty hack!
251 : // There are some exceptions. Additional conversion are required following:
252 : // 0x0022 (QUOTATION MARK) --> 0x201D (RIGHT DOUBLE QUOTATION MARK)
253 : // 0x0027 (APOSTROPHE) --> 0x2019 (RIGHT SINGLE QUOTATION MARK)
254 : // 0x005C (REVERSE SOLIDUS) --> 0xFFE5 (FULLWIDTH YEN SIGN)
255 : // 0x0060 (GRAVE ACCENT) --> 0x2018 (LEFT SINGLE QUOTATION MARK)
256 : //
257 : // See the following page for detail:
258 : // http://wiki.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions
259 : int i, j, high, low;
260 0 : int n = sizeof(half2fullJISException) / sizeof(UnicodePairWithFlag);
261 0 : for( i = 0; i < n; i++ )
262 : {
263 0 : high = (half2fullJISException[i].first >> 8) & 0xFF;
264 0 : low = (half2fullJISException[i].first) & 0xFF;
265 :
266 0 : if( !table.mpIndex[high] )
267 : {
268 0 : table.mpIndex[high] = new UnicodePairWithFlag*[256];
269 :
270 0 : for( j = 0; j < 256; j++ )
271 0 : table.mpIndex[high][j] = NULL;
272 : }
273 0 : table.mpIndex[high][low] = &half2fullJISException[i];
274 : }
275 :
276 0 : return table;
277 : }
278 :
279 0 : oneToOneMapping& widthfolding::getfullKana2halfKanaTable()
280 : {
281 0 : static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_KATAKANA_ONLY);
282 0 : table.makeIndex();
283 0 : return table;
284 : }
285 :
286 0 : oneToOneMapping& widthfolding::gethalfKana2fullKanaTable()
287 : {
288 0 : static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_KATAKANA_ONLY);
289 0 : table.makeIndex();
290 0 : return table;
291 : }
292 :
293 : } } } }
294 :
295 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|