Branch data Line data Source code
1 : : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : : /*
3 : : * This file is part of the LibreOffice project.
4 : : *
5 : : * This Source Code Form is subject to the terms of the Mozilla Public
6 : : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : : *
9 : : * This file incorporates work covered by the following license notice:
10 : : *
11 : : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : : * contributor license agreements. See the NOTICE file distributed
13 : : * with this work for additional information regarding copyright
14 : : * ownership. The ASF licenses this file to you under the Apache
15 : : * License, Version 2.0 (the "License"); you may not use this file
16 : : * except in compliance with the License. You may obtain a copy of
17 : : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : : */
19 : :
20 : :
21 : : #include <rtl/ustrbuf.hxx>
22 : : #include <i18nutil/casefolding.hxx>
23 : : #include <i18nutil/unicode.hxx>
24 : :
25 : : #include <comphelper/processfactory.hxx>
26 : : #include <comphelper/string.hxx>
27 : : #include <osl/diagnose.h>
28 : :
29 : : #include <string.h>
30 : :
31 : : #include "characterclassificationImpl.hxx"
32 : : #include "breakiteratorImpl.hxx"
33 : :
34 : : #define TRANSLITERATION_ALL
35 : : #include "transliteration_body.hxx"
36 : :
37 : : using namespace ::com::sun::star::uno;
38 : : using namespace ::com::sun::star::lang;
39 : : using namespace ::rtl;
40 : :
41 : : namespace com { namespace sun { namespace star { namespace i18n {
42 : :
43 : :
44 : 35043 : Transliteration_body::Transliteration_body()
45 : : {
46 : 35043 : nMappingType = 0;
47 : 35043 : transliterationName = "Transliteration_body";
48 : 35043 : implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body";
49 : 35043 : }
50 : :
51 : 0 : sal_Int16 SAL_CALL Transliteration_body::getType() throw(RuntimeException)
52 : : {
53 : 0 : return TransliterationType::ONE_TO_ONE;
54 : : }
55 : :
56 : 0 : sal_Bool SAL_CALL Transliteration_body::equals(
57 : : const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/,
58 : : const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/)
59 : : throw(RuntimeException)
60 : : {
61 [ # # ]: 0 : throw RuntimeException();
62 : : }
63 : :
64 : : Sequence< OUString > SAL_CALL
65 : 0 : Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 )
66 : : throw( RuntimeException)
67 : : {
68 : 0 : Sequence< OUString > ostr(2);
69 [ # # ]: 0 : ostr[0] = str1;
70 [ # # ]: 0 : ostr[1] = str2;
71 : 0 : return ostr;
72 : : }
73 : :
74 : :
75 : 0 : static sal_uInt8 lcl_getMappingTypeForToggleCase( sal_uInt8 nMappingType, sal_Unicode cChar )
76 : : {
77 : 0 : sal_uInt8 nRes = nMappingType;
78 : :
79 : : // take care of TOGGLE_CASE transliteration:
80 : : // nMappingType should not be a combination of flags, thuse we decide now
81 : : // which one to use.
82 [ # # ]: 0 : if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
83 : : {
84 : 0 : const sal_Int16 nType = unicode::getUnicodeType( cChar );
85 [ # # ]: 0 : if (nType & 0x02 /* lower case*/)
86 : 0 : nRes = MappingTypeLowerToUpper;
87 : : else
88 : : {
89 : : // should also work properly for non-upper characters like white spacs, numbers, ...
90 : 0 : nRes = MappingTypeUpperToLower;
91 : : }
92 : : }
93 : :
94 : 0 : return nRes;
95 : : }
96 : :
97 : :
98 : : OUString SAL_CALL
99 : 893190 : Transliteration_body::transliterate(
100 : : const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
101 : : Sequence< sal_Int32 >& offset)
102 : : throw(RuntimeException)
103 : : {
104 : :
105 : 893190 : const sal_Unicode *in = inStr.getStr() + startPos;
106 : :
107 : : // Two different blocks to eliminate the if(useOffset) condition inside the
108 : : // inner k loop. Yes, on massive use even such small things do count.
109 [ + + ]: 893190 : if ( useOffset )
110 : : {
111 : 705 : sal_Int32 nOffCount = 0, i;
112 [ + + ]: 28654 : for (i = 0; i < nCount; i++)
113 : : {
114 : : // take care of TOGGLE_CASE transliteration:
115 : 27949 : sal_uInt8 nTmpMappingType = nMappingType;
116 [ - + ]: 27949 : if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
117 : 0 : nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
118 : :
119 : 27949 : const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
120 : 27949 : nOffCount += map.nmap;
121 : : }
122 : 705 : rtl_uString* pStr = comphelper::string::rtl_uString_alloc(nOffCount);
123 : 705 : sal_Unicode* out = pStr->buffer;
124 : :
125 [ - + ]: 705 : if ( nOffCount != offset.getLength() )
126 : 0 : offset.realloc( nOffCount );
127 : :
128 : 705 : sal_Int32 j = 0;
129 : 705 : sal_Int32 * pArr = offset.getArray();
130 [ + + ]: 28654 : for (i = 0; i < nCount; i++)
131 : : {
132 : : // take care of TOGGLE_CASE transliteration:
133 : 27949 : sal_uInt8 nTmpMappingType = nMappingType;
134 [ - + ]: 27949 : if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
135 : 0 : nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
136 : :
137 : 27949 : const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
138 [ + + ]: 55898 : for (sal_Int32 k = 0; k < map.nmap; k++)
139 : : {
140 : 27949 : pArr[j] = i + startPos;
141 : 27949 : out[j++] = map.map[k];
142 : : }
143 : : }
144 : 705 : out[j] = 0;
145 : :
146 : 705 : return OUString( pStr, SAL_NO_ACQUIRE );
147 : : }
148 : : else
149 : : {
150 : : // In the simple case of no offset sequence used we can eliminate the
151 : : // first getValue() loop. We could also assume that most calls result
152 : : // in identical string lengths, thus using a preallocated
153 : : // OUStringBuffer could be an easy way to assemble the return string
154 : : // without too much hassle. However, for single characters the
155 : : // OUStringBuffer::append() method is quite expensive compared to a
156 : : // simple array operation, so it pays here to copy the final result
157 : : // instead.
158 : :
159 : : // Allocate the max possible buffer. Try to use stack instead of heap,
160 : : // which would have to be reallocated most times anyways.
161 : 892485 : const sal_Int32 nLocalBuf = 2048;
162 : 892485 : sal_Unicode aLocalBuf[ nLocalBuf * NMAPPINGMAX ], *out = aLocalBuf, *pHeapBuf = NULL;
163 [ - + ]: 892485 : if ( nCount > nLocalBuf )
164 [ # # ]: 0 : out = pHeapBuf = new sal_Unicode[ nCount * NMAPPINGMAX ];
165 : :
166 : 892485 : sal_Int32 j = 0;
167 [ + + ]: 8340169 : for ( sal_Int32 i = 0; i < nCount; i++)
168 : : {
169 : : // take care of TOGGLE_CASE transliteration:
170 : 7447684 : sal_uInt8 nTmpMappingType = nMappingType;
171 [ - + ]: 7447684 : if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
172 [ # # ]: 0 : nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
173 : :
174 [ + - ]: 7447684 : const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
175 [ + + ]: 14895368 : for (sal_Int32 k = 0; k < map.nmap; k++)
176 : : {
177 : 7447684 : out[j++] = map.map[k];
178 : : }
179 : : }
180 : :
181 : 892485 : OUString aRet( out, j );
182 [ - + ]: 892485 : if ( pHeapBuf )
183 [ # # ]: 0 : delete [] pHeapBuf;
184 : 893190 : return aRet;
185 : : }
186 : : }
187 : :
188 : : OUString SAL_CALL
189 : 0 : Transliteration_body::transliterateChar2String( sal_Unicode inChar ) throw(RuntimeException)
190 : : {
191 : 0 : const Mapping &map = casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
192 : 0 : rtl_uString* pStr = comphelper::string::rtl_uString_alloc(map.nmap);
193 : 0 : sal_Unicode* out = pStr->buffer;
194 : : sal_Int32 i;
195 : :
196 [ # # ]: 0 : for (i = 0; i < map.nmap; i++)
197 : 0 : out[i] = map.map[i];
198 : 0 : out[i] = 0;
199 : :
200 : 0 : return OUString( pStr, SAL_NO_ACQUIRE );
201 : : }
202 : :
203 : : sal_Unicode SAL_CALL
204 : 9 : Transliteration_body::transliterateChar2Char( sal_Unicode inChar ) throw(MultipleCharsOutputException, RuntimeException)
205 : : {
206 : 9 : const Mapping &map = casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
207 [ - + ]: 9 : if (map.nmap > 1)
208 [ # # ]: 0 : throw MultipleCharsOutputException();
209 : 9 : return map.map[0];
210 : : }
211 : :
212 : : OUString SAL_CALL
213 : 0 : Transliteration_body::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
214 : : Sequence< sal_Int32 >& offset) throw(RuntimeException)
215 : : {
216 : 0 : return this->transliterate(inStr, startPos, nCount, offset);
217 : : }
218 : :
219 : 34872 : Transliteration_casemapping::Transliteration_casemapping()
220 : : {
221 : 34872 : nMappingType = 0;
222 : 34872 : transliterationName = "casemapping(generic)";
223 : 34872 : implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
224 : 34872 : }
225 : :
226 : : void SAL_CALL
227 : 892430 : Transliteration_casemapping::setMappingType( const sal_uInt8 rMappingType, const Locale& rLocale )
228 : : {
229 : 892430 : nMappingType = rMappingType;
230 : 892430 : aLocale = rLocale;
231 : 892430 : }
232 : :
233 : 0 : Transliteration_u2l::Transliteration_u2l()
234 : : {
235 : 0 : nMappingType = MappingTypeUpperToLower;
236 : 0 : transliterationName = "upper_to_lower(generic)";
237 : 0 : implementationName = "com.sun.star.i18n.Transliteration.Transliteration_u2l";
238 : 0 : }
239 : :
240 : 6 : Transliteration_l2u::Transliteration_l2u()
241 : : {
242 : 6 : nMappingType = MappingTypeLowerToUpper;
243 : 6 : transliterationName = "lower_to_upper(generic)";
244 : 6 : implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u";
245 : 6 : }
246 : :
247 : 0 : Transliteration_togglecase::Transliteration_togglecase()
248 : : {
249 : : // usually nMappingType must NOT be a combiantion of different flages here,
250 : : // but we take care of that problem in Transliteration_body::transliterate above
251 : : // before that value is used. There we will decide which of both is to be used on
252 : : // a per character basis.
253 : 0 : nMappingType = MappingTypeLowerToUpper | MappingTypeUpperToLower;
254 : 0 : transliterationName = "toggle(generic)";
255 : 0 : implementationName = "com.sun.star.i18n.Transliteration.Transliteration_togglecase";
256 : 0 : }
257 : :
258 : 0 : Transliteration_titlecase::Transliteration_titlecase()
259 : : {
260 : 0 : nMappingType = MappingTypeToTitle;
261 : 0 : transliterationName = "title(generic)";
262 : 0 : implementationName = "com.sun.star.i18n.Transliteration.Transliteration_titlecase";
263 : 0 : }
264 : :
265 : 0 : static rtl::OUString transliterate_titlecase_Impl(
266 : : const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
267 : : const Locale &rLocale,
268 : : Sequence< sal_Int32 >& offset )
269 : : throw(RuntimeException)
270 : : {
271 : 0 : const OUString aText( inStr.copy( startPos, nCount ) );
272 : :
273 : 0 : OUString aRes;
274 [ # # ]: 0 : if (!aText.isEmpty())
275 : : {
276 [ # # ]: 0 : Reference< XMultiServiceFactory > xMSF = ::comphelper::getProcessServiceFactory();
277 [ # # ]: 0 : CharacterClassificationImpl aCharClassImpl( xMSF );
278 : :
279 : : // because aCharClassImpl.toTitle does not handle ligatures or ß but will raise
280 : : // an exception we need to handle the first chara manually...
281 : :
282 : : // we don't want to change surrogates by accident, thuse we use proper code point iteration
283 : 0 : sal_Int32 nPos = 0;
284 [ # # ]: 0 : sal_uInt32 cFirstChar = aText.iterateCodePoints( &nPos );
285 [ # # ]: 0 : OUString aResolvedLigature( &cFirstChar, 1 ); //lcl_ResolveLigature( cFirstChar ) );
286 : : // toUpper can be used to properly resolve ligatures and characters like ß
287 [ # # ]: 0 : aResolvedLigature = aCharClassImpl.toUpper( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
288 : : // since toTitle will leave all-uppercase text unchanged we first need to
289 : : // use toLower to bring possible 2nd and following charas in lowercase
290 [ # # ]: 0 : aResolvedLigature = aCharClassImpl.toLower( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
291 : 0 : sal_Int32 nResolvedLen = aResolvedLigature.getLength();
292 : :
293 : : // now we can properly use toTitle to get the expected result for the resolved string.
294 : : // The rest of the text should just become lowercase.
295 [ # # ]: 0 : aRes = aCharClassImpl.toTitle( aResolvedLigature, 0, nResolvedLen, rLocale );
296 [ # # ]: 0 : aRes += aCharClassImpl.toLower( aText, 1, aText.getLength() - 1, rLocale );
297 [ # # ]: 0 : offset.realloc( aRes.getLength() );
298 : :
299 [ # # ]: 0 : sal_Int32 *pOffset = offset.getArray();
300 : 0 : sal_Int32 nLen = offset.getLength();
301 [ # # ]: 0 : for (sal_Int32 i = 0; i < nLen; ++i)
302 : : {
303 : 0 : sal_Int32 nIdx = 0;
304 [ # # ]: 0 : if (i >= nResolvedLen)
305 : 0 : nIdx = i - nResolvedLen + 1;
306 : 0 : pOffset[i] = nIdx;
307 [ # # ]: 0 : }
308 : : }
309 : : #if OSL_DEBUG_LEVEL > 1
310 : : const sal_Int32 *pCOffset = offset.getConstArray();
311 : : (void) pCOffset;
312 : : #endif
313 : :
314 : 0 : return aRes;
315 : : }
316 : :
317 : :
318 : : // this function expects to be called on a word-by-word basis,
319 : : // namely that startPos points to the first char of the word
320 : 0 : rtl::OUString SAL_CALL Transliteration_titlecase::transliterate(
321 : : const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
322 : : Sequence< sal_Int32 >& offset )
323 : : throw(RuntimeException)
324 : : {
325 : 0 : return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset );
326 : : }
327 : :
328 : :
329 : 0 : Transliteration_sentencecase::Transliteration_sentencecase()
330 : : {
331 : 0 : nMappingType = MappingTypeToTitle; // though only to be applied to the first word...
332 : 0 : transliterationName = "sentence(generic)";
333 : 0 : implementationName = "com.sun.star.i18n.Transliteration.Transliteration_sentencecase";
334 : 0 : }
335 : :
336 : :
337 : : // this function expects to be called on a sentence-by-sentence basis,
338 : : // namely that startPos points to the first word (NOT first char!) in the sentence
339 : 0 : rtl::OUString SAL_CALL Transliteration_sentencecase::transliterate(
340 : : const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
341 : : Sequence< sal_Int32 >& offset )
342 : : throw(RuntimeException)
343 : : {
344 : 0 : return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset );
345 : : }
346 : :
347 : :
348 : : } } } }
349 : :
350 : : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|