Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 :
21 : #include <assert.h>
22 : #include <textconversion.hxx>
23 : #include <com/sun/star/i18n/TextConversionType.hpp>
24 : #include <com/sun/star/i18n/TextConversionOption.hpp>
25 : #include <com/sun/star/linguistic2/ConversionDirection.hpp>
26 : #include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
27 : #include <com/sun/star/linguistic2/ConversionDictionaryList.hpp>
28 : #include <comphelper/string.hxx>
29 : #include <boost/scoped_array.hpp>
30 :
31 : using namespace com::sun::star::lang;
32 : using namespace com::sun::star::i18n;
33 : using namespace com::sun::star::linguistic2;
34 : using namespace com::sun::star::uno;
35 :
36 :
37 : namespace com { namespace sun { namespace star { namespace i18n {
38 :
39 2 : TextConversion_zh::TextConversion_zh( const Reference < XComponentContext >& xContext )
40 2 : : TextConversionService("com.sun.star.i18n.TextConversion_zh")
41 : {
42 2 : xCDL = ConversionDictionaryList::create(xContext);
43 2 : }
44 :
45 4 : sal_Unicode SAL_CALL getOneCharConversion(sal_Unicode ch, const sal_Unicode* Data, const sal_uInt16* Index)
46 : {
47 4 : if (Data && Index) {
48 4 : sal_Unicode address = Index[ch>>8];
49 4 : if (address != 0xFFFF)
50 4 : address = Data[address + (ch & 0xFF)];
51 4 : return (address != 0xFFFF) ? address : ch;
52 : } else {
53 0 : return ch;
54 : }
55 : }
56 :
57 : #ifdef DISABLE_DYNLOADING
58 :
59 : extern "C" {
60 :
61 : const sal_Unicode* getSTC_CharData_T2S();
62 : const sal_uInt16* getSTC_CharIndex_T2S();
63 : const sal_Unicode* getSTC_CharData_S2V();
64 : const sal_uInt16* getSTC_CharIndex_S2V();
65 : const sal_Unicode* getSTC_CharData_S2T();
66 : const sal_uInt16* getSTC_CharIndex_S2T();
67 :
68 : const sal_Unicode *getSTC_WordData(sal_Int32&);
69 :
70 : const sal_uInt16 *getSTC_WordIndex_T2S(sal_Int32&);
71 : const sal_uInt16 *getSTC_WordEntry_T2S();
72 : const sal_uInt16 *getSTC_WordIndex_S2T(sal_Int32&);
73 : const sal_uInt16 *getSTC_WordEntry_S2T();
74 :
75 : }
76 :
77 : #endif
78 :
79 : OUString SAL_CALL
80 4 : TextConversion_zh::getCharConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions)
81 : {
82 : const sal_Unicode *Data;
83 : const sal_uInt16 *Index;
84 :
85 : #ifndef DISABLE_DYNLOADING
86 4 : if (toSChinese) {
87 2 : Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_T2S"))();
88 2 : Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_T2S"))();
89 2 : } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
90 2 : Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2V"))();
91 2 : Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2V"))();
92 : } else {
93 0 : Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2T"))();
94 0 : Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2T"))();
95 : }
96 : #else
97 : if (toSChinese) {
98 : Data = getSTC_CharData_T2S();
99 : Index = getSTC_CharIndex_T2S();
100 : } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
101 : Data = getSTC_CharData_S2V();
102 : Index = getSTC_CharIndex_S2V();
103 : } else {
104 : Data = getSTC_CharData_S2T();
105 : Index = getSTC_CharIndex_S2T();
106 : }
107 : #endif
108 :
109 4 : rtl_uString * newStr = rtl_uString_alloc(nLength);
110 8 : for (sal_Int32 i = 0; i < nLength; i++)
111 : newStr->buffer[i] =
112 4 : getOneCharConversion(aText[nStartPos+i], Data, Index);
113 4 : return OUString(newStr, SAL_NO_ACQUIRE); //take ownership
114 : }
115 :
116 : OUString SAL_CALL
117 0 : TextConversion_zh::getWordConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
118 : {
119 0 : sal_Int32 dictLen = 0;
120 0 : sal_Int32 maxLen = 0;
121 : const sal_uInt16 *index;
122 : const sal_uInt16 *entry;
123 : const sal_Unicode *charData;
124 : const sal_uInt16 *charIndex;
125 0 : bool one2one=true;
126 :
127 : #ifndef DISABLE_DYNLOADING
128 0 : const sal_Unicode *wordData = reinterpret_cast<const sal_Unicode* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordData"))(dictLen);
129 0 : if (toSChinese) {
130 0 : index = reinterpret_cast<const sal_uInt16* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordIndex_T2S"))(maxLen);
131 0 : entry = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_WordEntry_T2S"))();
132 0 : charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_T2S"))();
133 0 : charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_T2S"))();
134 : } else {
135 0 : index = reinterpret_cast<const sal_uInt16* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordIndex_S2T"))(maxLen);
136 0 : entry = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_WordEntry_S2T"))();
137 0 : if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
138 0 : charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2V"))();
139 0 : charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2V"))();
140 : } else {
141 0 : charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2T"))();
142 0 : charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2T"))();
143 : }
144 : }
145 : #else
146 : const sal_Unicode *wordData = getSTC_WordData(dictLen);
147 : if (toSChinese) {
148 : index = getSTC_WordIndex_T2S(maxLen);
149 : entry = getSTC_WordEntry_T2S();
150 : charData = getSTC_CharData_T2S();
151 : charIndex = getSTC_CharIndex_T2S();
152 : } else {
153 : index = getSTC_WordIndex_S2T(maxLen);
154 : entry = getSTC_WordEntry_S2T();
155 : if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
156 : charData = getSTC_CharData_S2V();
157 : charIndex = getSTC_CharIndex_S2V();
158 : } else {
159 : charData = getSTC_CharData_S2T();
160 : charIndex = getSTC_CharIndex_S2T();
161 : }
162 : }
163 : #endif
164 :
165 0 : if ((!wordData || !index || !entry) && !xCDL.is()) // no word mapping defined, do char2char conversion.
166 0 : return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
167 :
168 0 : boost::scoped_array<sal_Unicode> newStr(new sal_Unicode[nLength * 2 + 1]);
169 0 : sal_Int32 currPos = 0, count = 0;
170 0 : while (currPos < nLength) {
171 0 : sal_Int32 len = nLength - currPos;
172 0 : bool found = false;
173 0 : if (len > maxLen)
174 0 : len = maxLen;
175 0 : for (; len > 0 && ! found; len--) {
176 0 : OUString word = aText.copy(nStartPos + currPos, len);
177 0 : sal_Int32 current = 0;
178 : // user dictionary
179 0 : if (xCDL.is()) {
180 0 : Sequence < OUString > conversions;
181 : try {
182 0 : conversions = xCDL->queryConversions(word, 0, len,
183 : aLocale, ConversionDictionaryType::SCHINESE_TCHINESE,
184 : /*toSChinese ?*/ ConversionDirection_FROM_LEFT /*: ConversionDirection_FROM_RIGHT*/,
185 0 : nConversionOptions);
186 : }
187 0 : catch ( NoSupportException & ) {
188 : // clear reference (when there is no user dictionary) in order
189 : // to not always have to catch this exception again
190 : // in further calls. (save time)
191 0 : xCDL = 0;
192 : }
193 0 : catch (...) {
194 : // catch all other exceptions to allow
195 : // querying the system dictionary in the next line
196 : }
197 0 : if (conversions.getLength() > 0) {
198 0 : if (offset.getLength() > 0) {
199 0 : if (word.getLength() != conversions[0].getLength())
200 0 : one2one=false;
201 0 : while (current < conversions[0].getLength()) {
202 0 : offset[count] = nStartPos + currPos + (current *
203 0 : word.getLength() / conversions[0].getLength());
204 0 : newStr[count++] = conversions[0][current++];
205 : }
206 : // offset[count-1] = nStartPos + currPos + word.getLength() - 1;
207 : } else {
208 0 : while (current < conversions[0].getLength())
209 0 : newStr[count++] = conversions[0][current++];
210 : }
211 0 : currPos += word.getLength();
212 0 : found = true;
213 0 : }
214 : }
215 :
216 0 : if (!found && index[len+1] - index[len] > 0) {
217 0 : sal_Int32 bottom = (sal_Int32) index[len];
218 0 : sal_Int32 top = (sal_Int32) index[len+1] - 1;
219 :
220 0 : while (bottom <= top && !found) {
221 0 : current = (top + bottom) / 2;
222 0 : const sal_Int32 result = word.compareTo(wordData + entry[current]);
223 0 : if (result < 0)
224 0 : top = current - 1;
225 0 : else if (result > 0)
226 0 : bottom = current + 1;
227 : else {
228 0 : if (toSChinese) // Traditionary/Simplified conversion,
229 0 : for (current = entry[current]-1; current > 0 && wordData[current-1]; current--) ;
230 : else // Simplified/Traditionary conversion, forwards search for next word
231 0 : current = entry[current] + word.getLength() + 1;
232 0 : sal_Int32 start=current;
233 0 : if (offset.getLength() > 0) {
234 0 : if (word.getLength() != OUString(&wordData[current]).getLength())
235 0 : one2one=false;
236 0 : sal_Int32 convertedLength=OUString(&wordData[current]).getLength();
237 0 : while (wordData[current]) {
238 0 : offset[count]=nStartPos + currPos + ((current-start) *
239 0 : word.getLength() / convertedLength);
240 0 : newStr[count++] = wordData[current++];
241 : }
242 : // offset[count-1]=nStartPos + currPos + word.getLength() - 1;
243 : } else {
244 0 : while (wordData[current])
245 0 : newStr[count++] = wordData[current++];
246 : }
247 0 : currPos += word.getLength();
248 0 : found = true;
249 : }
250 : }
251 : }
252 0 : }
253 0 : if (!found) {
254 0 : if (offset.getLength() > 0)
255 0 : offset[count]=nStartPos+currPos;
256 0 : newStr[count++] =
257 0 : getOneCharConversion(aText[nStartPos+currPos], charData, charIndex);
258 0 : currPos++;
259 : }
260 : }
261 0 : if (offset.getLength() > 0)
262 0 : offset.realloc(one2one ? 0 : count);
263 0 : OUString aRet(newStr.get(), count);
264 0 : return aRet;
265 : }
266 :
267 : TextConversionResult SAL_CALL
268 2 : TextConversion_zh::getConversions( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
269 : const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
270 : throw( RuntimeException, IllegalArgumentException, NoSupportException, std::exception )
271 : {
272 2 : TextConversionResult result;
273 :
274 2 : result.Candidates.realloc(1);
275 2 : result.Candidates[0] = getConversion( aText, nStartPos, nLength, rLocale, nConversionType, nConversionOptions);
276 2 : result.Boundary.startPos = nStartPos;
277 2 : result.Boundary.endPos = nStartPos + nLength;
278 :
279 2 : return result;
280 : }
281 :
282 : OUString SAL_CALL
283 2 : TextConversion_zh::getConversion( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
284 : const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
285 : throw( RuntimeException, IllegalArgumentException, NoSupportException, std::exception )
286 : {
287 2 : if (rLocale.Language == "zh" && ( nConversionType == TextConversionType::TO_SCHINESE || nConversionType == TextConversionType::TO_TCHINESE) ) {
288 :
289 2 : aLocale=rLocale;
290 2 : bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
291 :
292 2 : if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER)
293 : // char to char dictionary
294 2 : return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
295 : else {
296 0 : Sequence <sal_Int32> offset;
297 : // word to word dictionary
298 0 : return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
299 : }
300 : } else
301 0 : throw NoSupportException(); // Conversion type is not supported in this service.
302 : }
303 :
304 : OUString SAL_CALL
305 2 : TextConversion_zh::getConversionWithOffset( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
306 : const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
307 : throw( RuntimeException, IllegalArgumentException, NoSupportException, std::exception )
308 : {
309 2 : if (rLocale.Language == "zh" && ( nConversionType == TextConversionType::TO_SCHINESE || nConversionType == TextConversionType::TO_TCHINESE) ) {
310 :
311 2 : aLocale=rLocale;
312 2 : bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
313 :
314 2 : if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER) {
315 2 : offset.realloc(0);
316 : // char to char dictionary
317 2 : return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
318 : } else {
319 0 : if (offset.getLength() < 2*nLength)
320 0 : offset.realloc(2*nLength);
321 : // word to word dictionary
322 0 : return getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
323 : }
324 : } else
325 0 : throw NoSupportException(); // Conversion type is not supported in this service.
326 : }
327 :
328 : sal_Bool SAL_CALL
329 0 : TextConversion_zh::interactiveConversion( const Locale& /*rLocale*/, sal_Int16 /*nTextConversionType*/, sal_Int32 /*nTextConversionOptions*/ )
330 : throw( RuntimeException, IllegalArgumentException, NoSupportException, std::exception )
331 : {
332 0 : return sal_False;
333 : }
334 :
335 : } } } }
336 :
337 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|