Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <cclass_unicode.hxx>
21 : #include <com/sun/star/i18n/UnicodeScript.hpp>
22 : #include <com/sun/star/i18n/UnicodeType.hpp>
23 : #include <com/sun/star/i18n/KCharacterType.hpp>
24 : #include <unicode/uchar.h>
25 : #include <comphelper/string.hxx>
26 : #include <cppuhelper/supportsservice.hxx>
27 : #include <breakiteratorImpl.hxx>
28 :
29 : using namespace ::com::sun::star::uno;
30 : using namespace ::com::sun::star::lang;
31 :
32 : namespace com { namespace sun { namespace star { namespace i18n {
33 :
34 : // class cclass_Unicode
35 : // ----------------------------------------------------;
36 :
37 60441 : cclass_Unicode::cclass_Unicode( const uno::Reference < XComponentContext >& rxContext ) : m_xContext( rxContext ),
38 : pTable( NULL ),
39 : pStart( NULL ),
40 : pCont( NULL ),
41 : nStartTypes( 0 ),
42 : nContTypes( 0 ),
43 : eState( ssGetChar ),
44 : cGroupSep( ',' ),
45 60441 : cDecimalSep( '.' )
46 : {
47 60441 : trans = new Transliteration_casemapping();
48 60441 : }
49 :
50 180732 : cclass_Unicode::~cclass_Unicode() {
51 60244 : destroyParserTable();
52 60244 : delete trans;
53 120488 : }
54 :
55 :
56 : OUString SAL_CALL
57 3056107 : cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException, std::exception) {
58 3056107 : sal_Int32 len = Text.getLength();
59 3056107 : if (nPos >= len)
60 10648 : return OUString();
61 3045459 : if (nCount + nPos > len)
62 0 : nCount = len - nPos;
63 :
64 3045459 : trans->setMappingType(MappingTypeToUpper, rLocale);
65 3045459 : return trans->transliterateString2String(Text, nPos, nCount);
66 : }
67 :
68 : OUString SAL_CALL
69 32157 : cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException, std::exception) {
70 32157 : sal_Int32 len = Text.getLength();
71 32157 : if (nPos >= len)
72 0 : return OUString();
73 32157 : if (nCount + nPos > len)
74 0 : nCount = len - nPos;
75 :
76 32157 : trans->setMappingType(MappingTypeToLower, rLocale);
77 32157 : return trans->transliterateString2String(Text, nPos, nCount);
78 : }
79 :
80 : OUString SAL_CALL
81 10 : cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException, std::exception) {
82 10 : sal_Int32 len = Text.getLength();
83 10 : if (nPos >= len)
84 0 : return OUString();
85 10 : if (nCount + nPos > len)
86 0 : nCount = len - nPos;
87 :
88 10 : trans->setMappingType(MappingTypeToTitle, rLocale);
89 10 : rtl_uString* pStr = rtl_uString_alloc(nCount);
90 10 : sal_Unicode* out = pStr->buffer;
91 10 : BreakIteratorImpl brk(m_xContext);
92 : Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale,
93 10 : WordType::ANYWORD_IGNOREWHITESPACES, sal_True);
94 56 : for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
95 46 : if (i >= bdy.endPos)
96 : bdy = brk.nextWord(Text, bdy.endPos, rLocale,
97 6 : WordType::ANYWORD_IGNOREWHITESPACES);
98 46 : *out = (i == bdy.startPos) ?
99 46 : trans->transliterateChar2Char(Text[i]) : Text[i];
100 : }
101 10 : *out = 0;
102 10 : return OUString( pStr, SAL_NO_ACQUIRE );
103 : }
104 :
105 : sal_Int16 SAL_CALL
106 57528 : cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException, std::exception) {
107 57528 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
108 54010 : return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0));
109 : }
110 :
111 : sal_Int16 SAL_CALL
112 19727 : cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException, std::exception) {
113 19727 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
114 19727 : return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0));
115 : }
116 :
117 :
118 : sal_Int16 SAL_CALL
119 36 : cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException, std::exception) {
120 36 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
121 : // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
122 : // while OO.o enum UnicideScript starts from 0.
123 : // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
124 36 : return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1;
125 : }
126 :
127 :
128 : sal_Int32 SAL_CALL
129 325134 : cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
130 : using namespace ::com::sun::star::i18n::KCharacterType;
131 :
132 325134 : sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
133 325134 : switch ( u_charType(ch) ) {
134 : // Upper
135 : case U_UPPERCASE_LETTER :
136 70260 : return UPPER|LETTER|PRINTABLE|BASE_FORM;
137 :
138 : // Lower
139 : case U_LOWERCASE_LETTER :
140 128485 : return LOWER|LETTER|PRINTABLE|BASE_FORM;
141 :
142 : // Title
143 : case U_TITLECASE_LETTER :
144 0 : return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
145 :
146 : // Letter
147 : case U_MODIFIER_LETTER :
148 : case U_OTHER_LETTER :
149 460 : return LETTER|PRINTABLE|BASE_FORM;
150 :
151 : // Digit
152 : case U_DECIMAL_DIGIT_NUMBER:
153 : case U_LETTER_NUMBER:
154 : case U_OTHER_NUMBER:
155 117541 : return DIGIT|PRINTABLE|BASE_FORM;
156 :
157 : // Base
158 : case U_NON_SPACING_MARK:
159 : case U_ENCLOSING_MARK:
160 : case U_COMBINING_SPACING_MARK:
161 120 : return BASE_FORM|PRINTABLE;
162 :
163 : // Print
164 : case U_SPACE_SEPARATOR:
165 :
166 : case U_DASH_PUNCTUATION:
167 : case U_INITIAL_PUNCTUATION:
168 : case U_FINAL_PUNCTUATION:
169 : case U_CONNECTOR_PUNCTUATION:
170 : case U_OTHER_PUNCTUATION:
171 :
172 : case U_MATH_SYMBOL:
173 : case U_CURRENCY_SYMBOL:
174 : case U_MODIFIER_SYMBOL:
175 : case U_OTHER_SYMBOL:
176 7378 : return PRINTABLE;
177 :
178 : // Control
179 : case U_CONTROL_CHAR:
180 : case U_FORMAT_CHAR:
181 828 : return CONTROL;
182 :
183 : case U_LINE_SEPARATOR:
184 : case U_PARAGRAPH_SEPARATOR:
185 0 : return CONTROL|PRINTABLE;
186 :
187 : // for all others
188 : default:
189 62 : return U_GENERAL_OTHER_TYPES;
190 : }
191 : }
192 :
193 : sal_Int32 SAL_CALL
194 233449 : cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException, std::exception) {
195 233449 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
196 233449 : return getCharType(Text, &nPos, 0);
197 :
198 : }
199 :
200 : sal_Int32 SAL_CALL
201 18824 : cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException, std::exception) {
202 18824 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
203 :
204 18824 : sal_Int32 result = 0;
205 :
206 129333 : while (nCount > 0 && nPos < Text.getLength())
207 : {
208 91685 : sal_Int32 nOrigPos = nPos;
209 91685 : result |= getCharType(Text, &nPos, 1);
210 91685 : sal_Int32 nUtf16Units = nPos - nOrigPos;
211 91685 : nCount -= nUtf16Units;
212 : }
213 :
214 18824 : return result;
215 : }
216 :
217 36168 : ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
218 : const OUString& Text,
219 : sal_Int32 nPos,
220 : const Locale& rLocale,
221 : sal_Int32 startCharTokenType,
222 : const OUString& userDefinedCharactersStart,
223 : sal_Int32 contCharTokenType,
224 : const OUString& userDefinedCharactersCont )
225 : throw(RuntimeException, std::exception)
226 : {
227 36168 : ParseResult r;
228 36168 : if ( Text.getLength() <= nPos )
229 3518 : return r;
230 :
231 : setupParserTable( rLocale,
232 : startCharTokenType, userDefinedCharactersStart,
233 32650 : contCharTokenType, userDefinedCharactersCont );
234 32650 : parseText( r, Text, nPos );
235 :
236 32650 : return r;
237 : }
238 :
239 :
240 95338 : ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
241 : sal_Int32 nTokenType,
242 : const OUString& Text,
243 : sal_Int32 nPos,
244 : const Locale& rLocale,
245 : sal_Int32 startCharTokenType,
246 : const OUString& userDefinedCharactersStart,
247 : sal_Int32 contCharTokenType,
248 : const OUString& userDefinedCharactersCont )
249 : throw(RuntimeException, std::exception)
250 : {
251 95338 : ParseResult r;
252 95338 : if ( Text.getLength() <= nPos )
253 3520 : return r;
254 :
255 : setupParserTable( rLocale,
256 : startCharTokenType, userDefinedCharactersStart,
257 91818 : contCharTokenType, userDefinedCharactersCont );
258 91818 : parseText( r, Text, nPos, nTokenType );
259 :
260 91818 : return r;
261 : }
262 :
263 0 : OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException, std::exception )
264 : {
265 0 : return OUString("com.sun.star.i18n.CharacterClassification_Unicode");
266 : }
267 :
268 0 : sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException, std::exception )
269 : {
270 0 : return cppu::supportsService(this, rServiceName);
271 : }
272 :
273 0 : Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException, std::exception )
274 : {
275 0 : Sequence< OUString > aRet(1);
276 0 : aRet[0] = OUString("com.sun.star.i18n.CharacterClassification_Unicode");
277 0 : return aRet;
278 : }
279 :
280 : } } } }
281 :
282 : extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * SAL_CALL
283 60441 : com_sun_star_i18n_CharacterClassification_Unicode_get_implementation(
284 : css::uno::XComponentContext *context,
285 : css::uno::Sequence<css::uno::Any> const &)
286 : {
287 60441 : return cppu::acquire(new css::i18n::cclass_Unicode(context));
288 : }
289 :
290 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|