Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <cclass_unicode.hxx>
21 : #include <com/sun/star/i18n/UnicodeScript.hpp>
22 : #include <com/sun/star/i18n/UnicodeType.hpp>
23 : #include <com/sun/star/i18n/KCharacterType.hpp>
24 : #include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
25 : #include <unicode/uchar.h>
26 : #include <comphelper/string.hxx>
27 : #include <cppuhelper/exc_hlp.hxx>
28 : #include <cppuhelper/supportsservice.hxx>
29 : #include <breakiteratorImpl.hxx>
30 :
31 : using namespace ::com::sun::star::uno;
32 : using namespace ::com::sun::star::lang;
33 :
34 : namespace com { namespace sun { namespace star { namespace i18n {
35 :
36 : // class cclass_Unicode
37 : // ----------------------------------------------------;
38 :
39 39615 : cclass_Unicode::cclass_Unicode( const uno::Reference < XComponentContext >& rxContext ) : m_xContext( rxContext ),
40 : pTable( NULL ),
41 : pStart( NULL ),
42 : pCont( NULL ),
43 : nStartTypes( 0 ),
44 : nContTypes( 0 ),
45 : eState( ssGetChar ),
46 : cGroupSep( ',' ),
47 39615 : cDecimalSep( '.' )
48 : {
49 39615 : trans = new Transliteration_casemapping();
50 39615 : }
51 :
52 118467 : cclass_Unicode::~cclass_Unicode() {
53 39489 : destroyParserTable();
54 39489 : delete trans;
55 78978 : }
56 :
57 :
58 : OUString SAL_CALL
59 1956520 : cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException, std::exception) {
60 1956520 : sal_Int32 len = Text.getLength();
61 1956520 : if (nPos >= len)
62 29839 : return OUString();
63 1926681 : if (nCount + nPos > len)
64 0 : nCount = len - nPos;
65 :
66 1926681 : trans->setMappingType(MappingTypeToUpper, rLocale);
67 1926681 : return trans->transliterateString2String(Text, nPos, nCount);
68 : }
69 :
70 : OUString SAL_CALL
71 11850221 : cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException, std::exception) {
72 11850221 : sal_Int32 len = Text.getLength();
73 11850221 : if (nPos >= len)
74 0 : return OUString();
75 11850221 : if (nCount + nPos > len)
76 0 : nCount = len - nPos;
77 :
78 11850221 : trans->setMappingType(MappingTypeToLower, rLocale);
79 11850221 : return trans->transliterateString2String(Text, nPos, nCount);
80 : }
81 :
82 : OUString SAL_CALL
83 6 : cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException, std::exception) {
84 : try
85 : {
86 6 : sal_Int32 len = Text.getLength();
87 6 : if (nPos >= len)
88 0 : return OUString();
89 6 : if (nCount + nPos > len)
90 0 : nCount = len - nPos;
91 :
92 6 : trans->setMappingType(MappingTypeToTitle, rLocale);
93 6 : rtl_uString* pStr = rtl_uString_alloc(nCount);
94 6 : sal_Unicode* out = pStr->buffer;
95 6 : BreakIteratorImpl brk(m_xContext);
96 : Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale,
97 6 : WordType::ANYWORD_IGNOREWHITESPACES, sal_True);
98 30 : for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
99 24 : if (i >= bdy.endPos)
100 : bdy = brk.nextWord(Text, bdy.endPos, rLocale,
101 3 : WordType::ANYWORD_IGNOREWHITESPACES);
102 24 : *out = (i == bdy.startPos) ?
103 24 : trans->transliterateChar2Char(Text[i]) : Text[i];
104 : }
105 6 : *out = 0;
106 6 : return OUString( pStr, SAL_NO_ACQUIRE );
107 : }
108 0 : catch (const RuntimeException&)
109 : {
110 0 : throw;
111 : }
112 0 : catch (const Exception& e)
113 : {
114 0 : uno::Any a(cppu::getCaughtException());
115 : throw lang::WrappedTargetRuntimeException(
116 0 : "wrapped " + a.getValueTypeName() + ": " + e.Message,
117 0 : uno::Reference<uno::XInterface>(), a);
118 : }
119 : }
120 :
121 : sal_Int16 SAL_CALL
122 29253 : cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException, std::exception) {
123 29253 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
124 27453 : return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0));
125 : }
126 :
127 : sal_Int16 SAL_CALL
128 17322 : cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException, std::exception) {
129 17322 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
130 17322 : return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0));
131 : }
132 :
133 :
134 : sal_Int16 SAL_CALL
135 18 : cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException, std::exception) {
136 18 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
137 : // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
138 : // while OO.o enum UnicideScript starts from 0.
139 : // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
140 18 : return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1;
141 : }
142 :
143 :
144 : sal_Int32 SAL_CALL
145 839196566 : cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
146 : using namespace ::com::sun::star::i18n::KCharacterType;
147 :
148 839196566 : sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
149 839196566 : switch ( u_charType(ch) ) {
150 : // Upper
151 : case U_UPPERCASE_LETTER :
152 114813938 : return UPPER|LETTER|PRINTABLE|BASE_FORM;
153 :
154 : // Lower
155 : case U_LOWERCASE_LETTER :
156 688556954 : return LOWER|LETTER|PRINTABLE|BASE_FORM;
157 :
158 : // Title
159 : case U_TITLECASE_LETTER :
160 0 : return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
161 :
162 : // Letter
163 : case U_MODIFIER_LETTER :
164 : case U_OTHER_LETTER :
165 277 : return LETTER|PRINTABLE|BASE_FORM;
166 :
167 : // Digit
168 : case U_DECIMAL_DIGIT_NUMBER:
169 : case U_LETTER_NUMBER:
170 : case U_OTHER_NUMBER:
171 10802170 : return DIGIT|PRINTABLE|BASE_FORM;
172 :
173 : // Base
174 : case U_NON_SPACING_MARK:
175 : case U_ENCLOSING_MARK:
176 : case U_COMBINING_SPACING_MARK:
177 84 : return BASE_FORM|PRINTABLE;
178 :
179 : // Print
180 : case U_SPACE_SEPARATOR:
181 :
182 : case U_DASH_PUNCTUATION:
183 : case U_INITIAL_PUNCTUATION:
184 : case U_FINAL_PUNCTUATION:
185 : case U_CONNECTOR_PUNCTUATION:
186 : case U_OTHER_PUNCTUATION:
187 :
188 : case U_MATH_SYMBOL:
189 : case U_CURRENCY_SYMBOL:
190 : case U_MODIFIER_SYMBOL:
191 : case U_OTHER_SYMBOL:
192 19339307 : return PRINTABLE;
193 :
194 : // Control
195 : case U_CONTROL_CHAR:
196 : case U_FORMAT_CHAR:
197 5683791 : return CONTROL;
198 :
199 : case U_LINE_SEPARATOR:
200 : case U_PARAGRAPH_SEPARATOR:
201 0 : return CONTROL|PRINTABLE;
202 :
203 : // for all others
204 : default:
205 45 : return U_GENERAL_OTHER_TYPES;
206 : }
207 : }
208 :
209 : sal_Int32 SAL_CALL
210 748118796 : cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException, std::exception) {
211 748118796 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
212 748118796 : return getCharType(Text, &nPos, 0);
213 :
214 : }
215 :
216 : sal_Int32 SAL_CALL
217 91077762 : cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException, std::exception) {
218 91077762 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
219 :
220 91077762 : sal_Int32 result = 0;
221 :
222 273233294 : while (nCount > 0 && nPos < Text.getLength())
223 : {
224 91077770 : sal_Int32 nOrigPos = nPos;
225 91077770 : result |= getCharType(Text, &nPos, 1);
226 91077770 : sal_Int32 nUtf16Units = nPos - nOrigPos;
227 91077770 : nCount -= nUtf16Units;
228 : }
229 :
230 91077762 : return result;
231 : }
232 :
233 18417 : ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
234 : const OUString& Text,
235 : sal_Int32 nPos,
236 : const Locale& rLocale,
237 : sal_Int32 startCharTokenType,
238 : const OUString& userDefinedCharactersStart,
239 : sal_Int32 contCharTokenType,
240 : const OUString& userDefinedCharactersCont )
241 : throw(RuntimeException, std::exception)
242 : {
243 18417 : ParseResult r;
244 18417 : if ( Text.getLength() <= nPos )
245 1800 : return r;
246 :
247 : setupParserTable( rLocale,
248 : startCharTokenType, userDefinedCharactersStart,
249 16617 : contCharTokenType, userDefinedCharactersCont );
250 16617 : parseText( r, Text, nPos );
251 :
252 16617 : return r;
253 : }
254 :
255 :
256 49133 : ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
257 : sal_Int32 nTokenType,
258 : const OUString& Text,
259 : sal_Int32 nPos,
260 : const Locale& rLocale,
261 : sal_Int32 startCharTokenType,
262 : const OUString& userDefinedCharactersStart,
263 : sal_Int32 contCharTokenType,
264 : const OUString& userDefinedCharactersCont )
265 : throw(RuntimeException, std::exception)
266 : {
267 49133 : ParseResult r;
268 49133 : if ( Text.getLength() <= nPos )
269 1801 : return r;
270 :
271 : setupParserTable( rLocale,
272 : startCharTokenType, userDefinedCharactersStart,
273 47332 : contCharTokenType, userDefinedCharactersCont );
274 47332 : parseText( r, Text, nPos, nTokenType );
275 :
276 47332 : return r;
277 : }
278 :
279 0 : OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException, std::exception )
280 : {
281 0 : return OUString("com.sun.star.i18n.CharacterClassification_Unicode");
282 : }
283 :
284 0 : sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException, std::exception )
285 : {
286 0 : return cppu::supportsService(this, rServiceName);
287 : }
288 :
289 0 : Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException, std::exception )
290 : {
291 0 : Sequence< OUString > aRet(1);
292 0 : aRet[0] = "com.sun.star.i18n.CharacterClassification_Unicode";
293 0 : return aRet;
294 : }
295 :
296 : } } } }
297 :
298 : extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * SAL_CALL
299 39615 : com_sun_star_i18n_CharacterClassification_Unicode_get_implementation(
300 : css::uno::XComponentContext *context,
301 : css::uno::Sequence<css::uno::Any> const &)
302 : {
303 39615 : return cppu::acquire(new css::i18n::cclass_Unicode(context));
304 : }
305 :
306 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|