Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 :
21 : #include <utility>
22 : #include <comphelper/string.hxx>
23 : #define TRANSLITERATION_ProlongedSoundMark_ja_JP
24 : #include <transliteration_Ignore.hxx>
25 :
26 : using namespace com::sun::star::uno;
27 : using namespace com::sun::star::lang;
28 :
29 :
30 : namespace com { namespace sun { namespace star { namespace i18n {
31 :
32 : static const sal_Unicode table_normalwidth[] = {
33 : // 0x0000, // 0x3040
34 : 0x3041, // 0x3041 HIRAGANA LETTER SMALL A
35 : 0x3042, // 0x3042 HIRAGANA LETTER A
36 : 0x3043, // 0x3043 HIRAGANA LETTER SMALL I
37 : 0x3044, // 0x3044 HIRAGANA LETTER I
38 : 0x3045, // 0x3045 HIRAGANA LETTER SMALL U
39 : 0x3046, // 0x3046 HIRAGANA LETTER U
40 : 0x3047, // 0x3047 HIRAGANA LETTER SMALL E
41 : 0x3048, // 0x3048 HIRAGANA LETTER E
42 : 0x3049, // 0x3049 HIRAGANA LETTER SMALL O
43 : 0x304a, // 0x304a HIRAGANA LETTER O
44 : 0x3042, // 0x304b HIRAGANA LETTER KA
45 : 0x3042, // 0x304c HIRAGANA LETTER GA
46 : 0x3044, // 0x304d HIRAGANA LETTER KI
47 : 0x3044, // 0x304e HIRAGANA LETTER GI
48 : 0x3046, // 0x304f HIRAGANA LETTER KU
49 : 0x3046, // 0x3050 HIRAGANA LETTER GU
50 : 0x3048, // 0x3051 HIRAGANA LETTER KE
51 : 0x3048, // 0x3052 HIRAGANA LETTER GE
52 : 0x304a, // 0x3053 HIRAGANA LETTER KO
53 : 0x304a, // 0x3054 HIRAGANA LETTER GO
54 : 0x3042, // 0x3055 HIRAGANA LETTER SA
55 : 0x3042, // 0x3056 HIRAGANA LETTER ZA
56 : 0x3044, // 0x3057 HIRAGANA LETTER SI
57 : 0x3044, // 0x3058 HIRAGANA LETTER ZI
58 : 0x3046, // 0x3059 HIRAGANA LETTER SU
59 : 0x3046, // 0x305a HIRAGANA LETTER ZU
60 : 0x3048, // 0x305b HIRAGANA LETTER SE
61 : 0x3048, // 0x305c HIRAGANA LETTER ZE
62 : 0x304a, // 0x305d HIRAGANA LETTER SO
63 : 0x304a, // 0x305e HIRAGANA LETTER ZO
64 : 0x3042, // 0x305f HIRAGANA LETTER TA
65 : 0x3042, // 0x3060 HIRAGANA LETTER DA
66 : 0x3044, // 0x3061 HIRAGANA LETTER TI
67 : 0x3044, // 0x3062 HIRAGANA LETTER DI
68 : 0x3045, // 0x3063 HIRAGANA LETTER SMALL TU
69 : 0x3046, // 0x3064 HIRAGANA LETTER TU
70 : 0x3046, // 0x3065 HIRAGANA LETTER DU
71 : 0x3048, // 0x3066 HIRAGANA LETTER TE
72 : 0x3048, // 0x3067 HIRAGANA LETTER DE
73 : 0x304a, // 0x3068 HIRAGANA LETTER TO
74 : 0x304a, // 0x3069 HIRAGANA LETTER DO
75 : 0x3042, // 0x306a HIRAGANA LETTER NA
76 : 0x3044, // 0x306b HIRAGANA LETTER NI
77 : 0x3046, // 0x306c HIRAGANA LETTER NU
78 : 0x3048, // 0x306d HIRAGANA LETTER NE
79 : 0x304a, // 0x306e HIRAGANA LETTER NO
80 : 0x3042, // 0x306f HIRAGANA LETTER HA
81 : 0x3042, // 0x3070 HIRAGANA LETTER BA
82 : 0x3042, // 0x3071 HIRAGANA LETTER PA
83 : 0x3044, // 0x3072 HIRAGANA LETTER HI
84 : 0x3044, // 0x3073 HIRAGANA LETTER BI
85 : 0x3044, // 0x3074 HIRAGANA LETTER PI
86 : 0x3046, // 0x3075 HIRAGANA LETTER HU
87 : 0x3046, // 0x3076 HIRAGANA LETTER BU
88 : 0x3046, // 0x3077 HIRAGANA LETTER PU
89 : 0x3048, // 0x3078 HIRAGANA LETTER HE
90 : 0x3048, // 0x3079 HIRAGANA LETTER BE
91 : 0x3048, // 0x307a HIRAGANA LETTER PE
92 : 0x304a, // 0x307b HIRAGANA LETTER HO
93 : 0x304a, // 0x307c HIRAGANA LETTER BO
94 : 0x304a, // 0x307d HIRAGANA LETTER PO
95 : 0x3042, // 0x307e HIRAGANA LETTER MA
96 : 0x3044, // 0x307f HIRAGANA LETTER MI
97 : 0x3046, // 0x3080 HIRAGANA LETTER MU
98 : 0x3048, // 0x3081 HIRAGANA LETTER ME
99 : 0x304a, // 0x3082 HIRAGANA LETTER MO
100 : 0x3041, // 0x3083 HIRAGANA LETTER SMALL YA
101 : 0x3042, // 0x3084 HIRAGANA LETTER YA
102 : 0x3045, // 0x3085 HIRAGANA LETTER SMALL YU
103 : 0x3046, // 0x3086 HIRAGANA LETTER YU
104 : 0x3049, // 0x3087 HIRAGANA LETTER SMALL YO
105 : 0x304a, // 0x3088 HIRAGANA LETTER YO
106 : 0x3042, // 0x3089 HIRAGANA LETTER RA
107 : 0x3044, // 0x308a HIRAGANA LETTER RI
108 : 0x3046, // 0x308b HIRAGANA LETTER RU
109 : 0x3048, // 0x308c HIRAGANA LETTER RE
110 : 0x304a, // 0x308d HIRAGANA LETTER RO
111 : 0x3041, // 0x308e HIRAGANA LETTER SMALL WA
112 : 0x3042, // 0x308f HIRAGANA LETTER WA
113 : 0x3044, // 0x3090 HIRAGANA LETTER WI
114 : 0x3048, // 0x3091 HIRAGANA LETTER WE
115 : 0x304a, // 0x3092 HIRAGANA LETTER WO
116 : 0x0000, // 0x3093 HIRAGANA LETTER N
117 : 0x3046, // 0x3094 HIRAGANA LETTER VU
118 : 0x0000, // 0x3095
119 : 0x0000, // 0x3096
120 : 0x0000, // 0x3097
121 : 0x0000, // 0x3098
122 : 0x0000, // 0x3099 COMBINING KATAKANA-HIRAGANA VOICED SOUND MARK
123 : 0x0000, // 0x309a COMBINING KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
124 : 0x0000, // 0x309b KATAKANA-HIRAGANA VOICED SOUND MARK
125 : 0x0000, // 0x309c KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
126 : 0x0000, // 0x309d HIRAGANA ITERATION MARK
127 : 0x0000, // 0x309e HIRAGANA VOICED ITERATION MARK
128 : 0x0000, // 0x309f
129 : 0x0000, // 0x30a0
130 : 0x30a1, // 0x30a1 KATAKANA LETTER SMALL A
131 : 0x30a2, // 0x30a2 KATAKANA LETTER A
132 : 0x30a3, // 0x30a3 KATAKANA LETTER SMALL I
133 : 0x30a4, // 0x30a4 KATAKANA LETTER I
134 : 0x30a5, // 0x30a5 KATAKANA LETTER SMALL U
135 : 0x30a6, // 0x30a6 KATAKANA LETTER U
136 : 0x30a7, // 0x30a7 KATAKANA LETTER SMALL E
137 : 0x30a8, // 0x30a8 KATAKANA LETTER E
138 : 0x30a9, // 0x30a9 KATAKANA LETTER SMALL O
139 : 0x30aa, // 0x30aa KATAKANA LETTER O
140 : 0x30a2, // 0x30ab KATAKANA LETTER KA
141 : 0x30a2, // 0x30ac KATAKANA LETTER GA
142 : 0x30a4, // 0x30ad KATAKANA LETTER KI
143 : 0x30a4, // 0x30ae KATAKANA LETTER GI
144 : 0x30a6, // 0x30af KATAKANA LETTER KU
145 : 0x30a6, // 0x30b0 KATAKANA LETTER GU
146 : 0x30a8, // 0x30b1 KATAKANA LETTER KE
147 : 0x30a8, // 0x30b2 KATAKANA LETTER GE
148 : 0x30aa, // 0x30b3 KATAKANA LETTER KO
149 : 0x30aa, // 0x30b4 KATAKANA LETTER GO
150 : 0x30a2, // 0x30b5 KATAKANA LETTER SA
151 : 0x30a2, // 0x30b6 KATAKANA LETTER ZA
152 : 0x30a4, // 0x30b7 KATAKANA LETTER SI
153 : 0x30a4, // 0x30b8 KATAKANA LETTER ZI
154 : 0x30a6, // 0x30b9 KATAKANA LETTER SU
155 : 0x30a6, // 0x30ba KATAKANA LETTER ZU
156 : 0x30a8, // 0x30bb KATAKANA LETTER SE
157 : 0x30a8, // 0x30bc KATAKANA LETTER ZE
158 : 0x30aa, // 0x30bd KATAKANA LETTER SO
159 : 0x30aa, // 0x30be KATAKANA LETTER ZO
160 : 0x30a2, // 0x30bf KATAKANA LETTER TA
161 : 0x30a2, // 0x30c0 KATAKANA LETTER DA
162 : 0x30a4, // 0x30c1 KATAKANA LETTER TI
163 : 0x30a4, // 0x30c2 KATAKANA LETTER DI
164 : 0x30a5, // 0x30c3 KATAKANA LETTER SMALL TU
165 : 0x30a6, // 0x30c4 KATAKANA LETTER TU
166 : 0x30a6, // 0x30c5 KATAKANA LETTER DU
167 : 0x30a8, // 0x30c6 KATAKANA LETTER TE
168 : 0x30a8, // 0x30c7 KATAKANA LETTER DE
169 : 0x30aa, // 0x30c8 KATAKANA LETTER TO
170 : 0x30aa, // 0x30c9 KATAKANA LETTER DO
171 : 0x30a2, // 0x30ca KATAKANA LETTER NA
172 : 0x30a4, // 0x30cb KATAKANA LETTER NI
173 : 0x30a6, // 0x30cc KATAKANA LETTER NU
174 : 0x30a8, // 0x30cd KATAKANA LETTER NE
175 : 0x30aa, // 0x30ce KATAKANA LETTER NO
176 : 0x30a2, // 0x30cf KATAKANA LETTER HA
177 : 0x30a2, // 0x30d0 KATAKANA LETTER BA
178 : 0x30a2, // 0x30d1 KATAKANA LETTER PA
179 : 0x30a4, // 0x30d2 KATAKANA LETTER HI
180 : 0x30a4, // 0x30d3 KATAKANA LETTER BI
181 : 0x30a4, // 0x30d4 KATAKANA LETTER PI
182 : 0x30a6, // 0x30d5 KATAKANA LETTER HU
183 : 0x30a6, // 0x30d6 KATAKANA LETTER BU
184 : 0x30a6, // 0x30d7 KATAKANA LETTER PU
185 : 0x30a8, // 0x30d8 KATAKANA LETTER HE
186 : 0x30a8, // 0x30d9 KATAKANA LETTER BE
187 : 0x30a8, // 0x30da KATAKANA LETTER PE
188 : 0x30aa, // 0x30db KATAKANA LETTER HO
189 : 0x30aa, // 0x30dc KATAKANA LETTER BO
190 : 0x30aa, // 0x30dd KATAKANA LETTER PO
191 : 0x30a2, // 0x30de KATAKANA LETTER MA
192 : 0x30a4, // 0x30df KATAKANA LETTER MI
193 : 0x30a6, // 0x30e0 KATAKANA LETTER MU
194 : 0x30a8, // 0x30e1 KATAKANA LETTER ME
195 : 0x30aa, // 0x30e2 KATAKANA LETTER MO
196 : 0x30a1, // 0x30e3 KATAKANA LETTER SMALL YA
197 : 0x30a2, // 0x30e4 KATAKANA LETTER YA
198 : 0x30a5, // 0x30e5 KATAKANA LETTER SMALL YU
199 : 0x30a6, // 0x30e6 KATAKANA LETTER YU
200 : 0x30a9, // 0x30e7 KATAKANA LETTER SMALL YO
201 : 0x30aa, // 0x30e8 KATAKANA LETTER YO
202 : 0x30a2, // 0x30e9 KATAKANA LETTER RA
203 : 0x30a4, // 0x30ea KATAKANA LETTER RI
204 : 0x30a6, // 0x30eb KATAKANA LETTER RU
205 : 0x30a8, // 0x30ec KATAKANA LETTER RE
206 : 0x30aa, // 0x30ed KATAKANA LETTER RO
207 : 0x30a1, // 0x30ee KATAKANA LETTER SMALL WA
208 : 0x30a2, // 0x30ef KATAKANA LETTER WA
209 : 0x30a4, // 0x30f0 KATAKANA LETTER WI
210 : 0x30a8, // 0x30f1 KATAKANA LETTER WE
211 : 0x30aa, // 0x30f2 KATAKANA LETTER WO
212 : 0x0000, // 0x30f3 KATAKANA LETTER N
213 : 0x30a6, // 0x30f4 KATAKANA LETTER VU
214 : 0x30a1, // 0x30f5 KATAKANA LETTER SMALL KA
215 : 0x30a7, // 0x30f6 KATAKANA LETTER SMALL KE
216 : 0x30a2, // 0x30f7 KATAKANA LETTER VA
217 : 0x30a4, // 0x30f8 KATAKANA LETTER VI
218 : 0x30a8, // 0x30f9 KATAKANA LETTER VE
219 : 0x30aa // 0x30fa KATAKANA LETTER VO
220 : // 0x0000, // 0x30fb KATAKANA MIDDLE DOT
221 : // 0x0000, // 0x30fc KATAKANA-HIRAGANA PROLONGED SOUND MARK
222 : // 0x0000, // 0x30fd KATAKANA ITERATION MARK
223 : // 0x0000, // 0x30fe KATAKANA VOICED ITERATION MARK
224 : // 0x0000 // 0x30ff
225 : };
226 :
227 : static const sal_Unicode table_halfwidth[] = {
228 : // 0x0000, // 0xff61 HALFWIDTH IDEOGRAPHIC FULL STOP
229 : // 0x0000, // 0xff62 HALFWIDTH LEFT CORNER BRACKET
230 : // 0x0000, // 0xff63 HALFWIDTH RIGHT CORNER BRACKET
231 : // 0x0000, // 0xff64 HALFWIDTH IDEOGRAPHIC COMMA
232 : // 0x0000, // 0xff65 HALFWIDTH KATAKANA MIDDLE DOT
233 : 0xff75, // 0xff66 HALFWIDTH KATAKANA LETTER WO
234 : 0xff67, // 0xff67 HALFWIDTH KATAKANA LETTER SMALL A
235 : 0xff68, // 0xff68 HALFWIDTH KATAKANA LETTER SMALL I
236 : 0xff69, // 0xff69 HALFWIDTH KATAKANA LETTER SMALL U
237 : 0xff6a, // 0xff6a HALFWIDTH KATAKANA LETTER SMALL E
238 : 0xff6b, // 0xff6b HALFWIDTH KATAKANA LETTER SMALL O
239 : 0xff67, // 0xff6c HALFWIDTH KATAKANA LETTER SMALL YA
240 : 0xff69, // 0xff6d HALFWIDTH KATAKANA LETTER SMALL YU
241 : 0xff6b, // 0xff6e HALFWIDTH KATAKANA LETTER SMALL YO
242 : 0xff69, // 0xff6f HALFWIDTH KATAKANA LETTER SMALL TU
243 : 0x0000, // 0xff70 HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
244 : 0xff71, // 0xff71 HALFWIDTH KATAKANA LETTER A
245 : 0xff72, // 0xff72 HALFWIDTH KATAKANA LETTER I
246 : 0xff73, // 0xff73 HALFWIDTH KATAKANA LETTER U
247 : 0xff74, // 0xff74 HALFWIDTH KATAKANA LETTER E
248 : 0xff75, // 0xff75 HALFWIDTH KATAKANA LETTER O
249 : 0xff71, // 0xff76 HALFWIDTH KATAKANA LETTER KA
250 : 0xff72, // 0xff77 HALFWIDTH KATAKANA LETTER KI
251 : 0xff73, // 0xff78 HALFWIDTH KATAKANA LETTER KU
252 : 0xff74, // 0xff79 HALFWIDTH KATAKANA LETTER KE
253 : 0xff75, // 0xff7a HALFWIDTH KATAKANA LETTER KO
254 : 0xff71, // 0xff7b HALFWIDTH KATAKANA LETTER SA
255 : 0xff72, // 0xff7c HALFWIDTH KATAKANA LETTER SI
256 : 0xff73, // 0xff7d HALFWIDTH KATAKANA LETTER SU
257 : 0xff74, // 0xff7e HALFWIDTH KATAKANA LETTER SE
258 : 0xff75, // 0xff7f HALFWIDTH KATAKANA LETTER SO
259 : 0xff71, // 0xff80 HALFWIDTH KATAKANA LETTER TA
260 : 0xff72, // 0xff81 HALFWIDTH KATAKANA LETTER TI
261 : 0xff73, // 0xff82 HALFWIDTH KATAKANA LETTER TU
262 : 0xff74, // 0xff83 HALFWIDTH KATAKANA LETTER TE
263 : 0xff75, // 0xff84 HALFWIDTH KATAKANA LETTER TO
264 : 0xff71, // 0xff85 HALFWIDTH KATAKANA LETTER NA
265 : 0xff72, // 0xff86 HALFWIDTH KATAKANA LETTER NI
266 : 0xff73, // 0xff87 HALFWIDTH KATAKANA LETTER NU
267 : 0xff74, // 0xff88 HALFWIDTH KATAKANA LETTER NE
268 : 0xff75, // 0xff89 HALFWIDTH KATAKANA LETTER NO
269 : 0xff71, // 0xff8a HALFWIDTH KATAKANA LETTER HA
270 : 0xff72, // 0xff8b HALFWIDTH KATAKANA LETTER HI
271 : 0xff73, // 0xff8c HALFWIDTH KATAKANA LETTER HU
272 : 0xff74, // 0xff8d HALFWIDTH KATAKANA LETTER HE
273 : 0xff75, // 0xff8e HALFWIDTH KATAKANA LETTER HO
274 : 0xff71, // 0xff8f HALFWIDTH KATAKANA LETTER MA
275 : 0xff72, // 0xff90 HALFWIDTH KATAKANA LETTER MI
276 : 0xff73, // 0xff91 HALFWIDTH KATAKANA LETTER MU
277 : 0xff74, // 0xff92 HALFWIDTH KATAKANA LETTER ME
278 : 0xff75, // 0xff93 HALFWIDTH KATAKANA LETTER MO
279 : 0xff71, // 0xff94 HALFWIDTH KATAKANA LETTER YA
280 : 0xff73, // 0xff95 HALFWIDTH KATAKANA LETTER YU
281 : 0xff75, // 0xff96 HALFWIDTH KATAKANA LETTER YO
282 : 0xff71, // 0xff97 HALFWIDTH KATAKANA LETTER RA
283 : 0xff72, // 0xff98 HALFWIDTH KATAKANA LETTER RI
284 : 0xff73, // 0xff99 HALFWIDTH KATAKANA LETTER RU
285 : 0xff74, // 0xff9a HALFWIDTH KATAKANA LETTER RE
286 : 0xff75, // 0xff9b HALFWIDTH KATAKANA LETTER RO
287 : 0xff71 // 0xff9c HALFWIDTH KATAKANA LETTER WA
288 : // 0x0000, // 0xff9d HALFWIDTH KATAKANA LETTER N
289 : // 0x0000, // 0xff9e HALFWIDTH KATAKANA VOICED SOUND MARK
290 : // 0x0000 // 0xff9f HALFWIDTH KATAKANA SEMI-VOICED SOUND MARK
291 : };
292 :
293 :
294 : OUString SAL_CALL
295 0 : ignoreProlongedSoundMark_ja_JP::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount, Sequence< sal_Int32 >& offset )
296 : throw(RuntimeException, std::exception)
297 : {
298 : // Create a string buffer which can hold nCount + 1 characters.
299 : // The reference count is 1 now.
300 0 : rtl_uString * newStr = rtl_uString_alloc(nCount);
301 0 : sal_Unicode * dst = newStr->buffer;
302 0 : const sal_Unicode * src = inStr.getStr() + startPos;
303 :
304 0 : sal_Int32 *p = 0;
305 0 : sal_Int32 position = 0;
306 :
307 0 : if (useOffset) {
308 : // Allocate nCount length to offset argument.
309 0 : offset.realloc( nCount );
310 0 : p = offset.getArray();
311 0 : position = startPos;
312 : }
313 :
314 :
315 0 : sal_Unicode previousChar = *src ++;
316 : sal_Unicode currentChar;
317 :
318 : // Conversion
319 0 : while (-- nCount > 0) {
320 0 : currentChar = *src ++;
321 :
322 0 : if (currentChar == 0x30fc || // KATAKANA-HIRAGANA PROLONGED SOUND MARK
323 : currentChar == 0xff70) { // HALFWIDTH KATAKANA-HIRAGANA PROLONGED SOUND MARK
324 :
325 0 : if (0x3041 <= previousChar && previousChar <= 0x30fa) {
326 0 : currentChar = table_normalwidth[ previousChar - 0x3041 ];
327 : }
328 0 : else if (0xff66 <= previousChar && previousChar <= 0xff9c) {
329 0 : currentChar = table_halfwidth[ previousChar - 0xff66 ];
330 : }
331 : }
332 :
333 0 : if (useOffset)
334 0 : *p ++ = position ++;
335 0 : *dst ++ = previousChar;
336 0 : previousChar = currentChar;
337 : }
338 :
339 0 : if (nCount == 0) {
340 0 : if (useOffset)
341 0 : *p = position;
342 0 : *dst ++ = previousChar;
343 : }
344 :
345 0 : *dst = (sal_Unicode) 0;
346 :
347 0 : newStr->length = sal_Int32(dst - newStr->buffer);
348 0 : if (useOffset)
349 0 : offset.realloc(newStr->length);
350 0 : return OUString(newStr, SAL_NO_ACQUIRE); // take ownership
351 :
352 : }
353 :
354 : } } } }
355 :
356 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|