Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : // prevent internal compiler error with MSVC6SP3
21 : #include <utility>
22 : #include <i18nutil/widthfolding.hxx>
23 : #include <comphelper/string.hxx>
24 : #include "widthfolding_data.h"
25 :
26 : using namespace com::sun::star::uno;
27 :
28 : using ::rtl::OUString;
29 :
30 : namespace com { namespace sun { namespace star { namespace i18n {
31 :
32 0 : sal_Unicode widthfolding::decompose_ja_voiced_sound_marksChar2Char (sal_Unicode inChar)
33 : {
34 0 : if (0x30a0 <= inChar && inChar <= 0x30ff) {
35 0 : sal_Int16 i = inChar - 0x3040;
36 0 : if (decomposition_table[i].decomposited_character_1)
37 0 : return 0xFFFF;
38 : }
39 0 : return inChar;
40 : }
41 :
42 : /**
43 : * Decompose Japanese specific voiced and semi-voiced sound marks.
44 : */
45 0 : OUString widthfolding::decompose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, sal_Bool useOffset )
46 : {
47 : // Create a string buffer which can hold nCount * 2 + 1 characters.
48 : // Its size may become double of nCount.
49 : // The reference count is 1 now.
50 0 : rtl_uString * newStr = rtl_uString_alloc(nCount * 2);
51 :
52 0 : sal_Int32 *p = NULL;
53 0 : sal_Int32 position = 0;
54 0 : if (useOffset) {
55 : // Allocate double of nCount length to offset argument.
56 0 : offset.realloc( nCount * 2 );
57 0 : p = offset.getArray();
58 0 : position = startPos;
59 : }
60 :
61 : // Prepare pointers of unicode character arrays.
62 0 : const sal_Unicode* src = inStr.getStr() + startPos;
63 0 : sal_Unicode* dst = newStr->buffer;
64 :
65 : // Decomposition: GA --> KA + voice-mark
66 0 : while (nCount -- > 0) {
67 0 : sal_Unicode c = *src++;
68 : // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F)
69 : // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF)
70 : // Hiragana is not applied to decomposition.
71 : // Only Katakana is applied to decomposition
72 0 : if (0x30a0 <= c && c <= 0x30ff) {
73 0 : int i = int(c - 0x3040);
74 0 : sal_Unicode first = decomposition_table[i].decomposited_character_1;
75 0 : if (first != 0x0000) {
76 0 : *dst ++ = first;
77 0 : *dst ++ = decomposition_table[i].decomposited_character_2; // second
78 0 : if (useOffset) {
79 0 : *p ++ = position;
80 0 : *p ++ = position ++;
81 : }
82 0 : continue;
83 : }
84 : }
85 0 : *dst ++ = c;
86 0 : if (useOffset)
87 0 : *p ++ = position ++;
88 : }
89 0 : *dst = (sal_Unicode) 0;
90 :
91 0 : newStr->length = sal_Int32(dst - newStr->buffer);
92 0 : if (useOffset)
93 0 : offset.realloc(newStr->length);
94 0 : return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
95 : }
96 :
97 0 : oneToOneMapping& widthfolding::getfull2halfTable(void)
98 : {
99 0 : static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_NORMAL);
100 0 : table.makeIndex();
101 0 : return table;
102 : }
103 :
104 : /**
105 : * Compose Japanese specific voiced and semi-voiced sound marks.
106 : */
107 0 : OUString widthfolding::compose_ja_voiced_sound_marks (const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset, sal_Bool useOffset, sal_Int32 nFlags )
108 : {
109 : // Create a string buffer which can hold nCount + 1 characters.
110 : // Its size may become equal to nCount or smaller.
111 : // The reference count is 1 now.
112 0 : rtl_uString * newStr = rtl_uString_alloc(nCount);
113 :
114 : // Prepare pointers of unicode character arrays.
115 0 : const sal_Unicode* src = inStr.getStr() + startPos;
116 0 : sal_Unicode* dst = newStr->buffer;
117 :
118 : // This conversion algorithm requires at least one character.
119 0 : if (nCount > 0) {
120 :
121 : // .. .. KA VOICE .. ..
122 : // ^ ^
123 : // previousChar currentChar
124 : // ^
125 : // position
126 : //
127 : // will be converted to
128 : // .. .. GA .. ..
129 :
130 0 : sal_Int32 *p = NULL;
131 0 : sal_Int32 position = 0;
132 0 : if (useOffset) {
133 : // Allocate nCount length to offset argument.
134 0 : offset.realloc( nCount );
135 0 : p = offset.getArray();
136 0 : position = startPos;
137 : }
138 :
139 : //
140 0 : sal_Unicode previousChar = *src ++;
141 : sal_Unicode currentChar;
142 :
143 : // Composition: KA + voice-mark --> GA
144 0 : while (-- nCount > 0) {
145 0 : currentChar = *src ++;
146 : // see http://charts.unicode.org/Web/U3040.html Hiragana (U+3040..U+309F)
147 : // see http://charts.unicode.org/Web/U30A0.html Katakana (U+30A0..U+30FF)
148 : // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
149 : // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
150 : // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK
151 : // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
152 0 : int j = currentChar - 0x3099; // 0x3099, 0x309a, 0x309b, 0x309c ?
153 :
154 0 : if (2 <= j && j <= 3) // 0x309b or 0x309c
155 0 : j -= 2;
156 :
157 0 : if (0 <= j && j <= 1) {
158 : // 0 addresses a code point regarding 0x3099 or 0x309b (voiced sound mark),
159 : // 1 is 0x309a or 0x309c (semi-voiced sound mark)
160 0 : int i = int(previousChar - 0x3040); // i acts as an index of array
161 0 : sal_Bool bCompose = sal_False;
162 :
163 0 : if (0 <= i && i <= (0x30ff - 0x3040) && composition_table[i][j])
164 0 : bCompose = sal_True;
165 :
166 : // not to use combined KATAKANA LETTER VU
167 0 : if ( previousChar == 0x30a6 && (nFlags & WIDTHFOLDNIG_DONT_USE_COMBINED_VU) )
168 0 : bCompose = sal_False;
169 :
170 0 : if( bCompose ){
171 0 : if (useOffset) {
172 0 : position ++;
173 0 : *p ++ = position ++;
174 : }
175 0 : *dst ++ = composition_table[i][j];
176 0 : previousChar = *src ++;
177 0 : nCount --;
178 0 : continue;
179 : }
180 : }
181 0 : if (useOffset)
182 0 : *p ++ = position ++;
183 0 : *dst ++ = previousChar;
184 0 : previousChar = currentChar;
185 : }
186 :
187 0 : if (nCount == 0) {
188 0 : if (useOffset)
189 0 : *p = position;
190 0 : *dst ++ = previousChar;
191 : }
192 :
193 0 : *dst = (sal_Unicode) 0;
194 :
195 0 : newStr->length = sal_Int32(dst - newStr->buffer);
196 : }
197 0 : if (useOffset)
198 0 : offset.realloc(newStr->length);
199 0 : return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
200 : }
201 :
202 6 : oneToOneMapping& widthfolding::gethalf2fullTable(void)
203 : {
204 6 : static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_NORMAL);
205 6 : table.makeIndex();
206 6 : return table;
207 : }
208 :
209 0 : sal_Unicode widthfolding::getCompositionChar(sal_Unicode c1, sal_Unicode c2)
210 : {
211 0 : return composition_table[c1 - 0x3040][c2 - 0x3099];
212 : }
213 :
214 :
215 0 : oneToOneMapping& widthfolding::getfull2halfTableForASC()
216 : {
217 0 : static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_ASC_FUNCTION);
218 0 : table.makeIndex();
219 :
220 : // bluedwarf: dirty hack!
221 : // There is an exception. Additional conversion is required following:
222 : // 0xFFE5 (FULLWIDTH YEN SIGN) --> 0x005C (REVERSE SOLIDUS)
223 : //
224 : // See the following page for detail:
225 : // http://wiki.services.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions
226 : int i, j, high, low;
227 0 : int n = sizeof(full2halfASCException) / sizeof(UnicodePairWithFlag);
228 0 : for( i = 0; i < n; i++ )
229 : {
230 0 : high = (full2halfASCException[i].first >> 8) & 0xFF;
231 0 : low = (full2halfASCException[i].first) & 0xFF;
232 :
233 0 : if( !table.mpIndex[high] )
234 : {
235 0 : table.mpIndex[high] = new UnicodePairWithFlag*[256];
236 :
237 0 : for( j = 0; j < 256; j++ )
238 0 : table.mpIndex[high][j] = NULL;
239 : }
240 0 : table.mpIndex[high][low] = &full2halfASCException[i];
241 : }
242 :
243 0 : return table;
244 : }
245 :
246 0 : oneToOneMapping& widthfolding::gethalf2fullTableForJIS()
247 : {
248 0 : static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_JIS_FUNCTION);
249 0 : table.makeIndex();
250 :
251 : // bluedwarf: dirty hack!
252 : // There are some exceptions. Additional conversion are required following:
253 : // 0x0022 (QUOTATION MARK) --> 0x201D (RIGHT DOUBLE QUOTATION MARK)
254 : // 0x0027 (APOSTROPHE) --> 0x2019 (RIGHT SINGLE QUOTATION MARK)
255 : // 0x005C (REVERSE SOLIDUS) --> 0xFFE5 (FULLWIDTH YEN SIGN)
256 : // 0x0060 (GRAVE ACCENT) --> 0x2018 (LEFT SINGLE QUOTATION MARK)
257 : //
258 : // See the following page for detail:
259 : // http://wiki.services.openoffice.org/wiki/Calc/Features/JIS_and_ASC_functions
260 : int i, j, high, low;
261 0 : int n = sizeof(half2fullJISException) / sizeof(UnicodePairWithFlag);
262 0 : for( i = 0; i < n; i++ )
263 : {
264 0 : high = (half2fullJISException[i].first >> 8) & 0xFF;
265 0 : low = (half2fullJISException[i].first) & 0xFF;
266 :
267 0 : if( !table.mpIndex[high] )
268 : {
269 0 : table.mpIndex[high] = new UnicodePairWithFlag*[256];
270 :
271 0 : for( j = 0; j < 256; j++ )
272 0 : table.mpIndex[high][j] = NULL;
273 : }
274 0 : table.mpIndex[high][low] = &half2fullJISException[i];
275 : }
276 :
277 0 : return table;
278 : }
279 :
280 0 : oneToOneMapping& widthfolding::getfullKana2halfKanaTable()
281 : {
282 0 : static oneToOneMappingWithFlag table(full2half, sizeof(full2half), FULL2HALF_KATAKANA_ONLY);
283 0 : table.makeIndex();
284 0 : return table;
285 : }
286 :
287 0 : oneToOneMapping& widthfolding::gethalfKana2fullKanaTable()
288 : {
289 0 : static oneToOneMappingWithFlag table(half2full, sizeof(half2full), HALF2FULL_KATAKANA_ONLY);
290 0 : table.makeIndex();
291 0 : return table;
292 : }
293 :
294 : } } } }
295 :
296 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|