Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 :
21 : #include <cclass_unicode.hxx>
22 : #include <com/sun/star/i18n/UnicodeScript.hpp>
23 : #include <com/sun/star/i18n/UnicodeType.hpp>
24 : #include <com/sun/star/i18n/KCharacterType.hpp>
25 : #include <unicode/uchar.h>
26 : #include <comphelper/string.hxx>
27 : #include <breakiteratorImpl.hxx>
28 :
29 : using namespace ::com::sun::star::uno;
30 : using namespace ::com::sun::star::lang;
31 : using namespace ::rtl;
32 :
33 : namespace com { namespace sun { namespace star { namespace i18n {
34 : // ----------------------------------------------------
35 : // class cclass_Unicode
36 : // ----------------------------------------------------;
37 :
38 11690 : cclass_Unicode::cclass_Unicode( const uno::Reference < XComponentContext >& rxContext ) : m_xContext( rxContext ),
39 : pTable( NULL ),
40 : pStart( NULL ),
41 : pCont( NULL ),
42 : nStartTypes( 0 ),
43 : nContTypes( 0 ),
44 : eState( ssGetChar ),
45 : cGroupSep( ',' ),
46 11690 : cDecimalSep( '.' )
47 : {
48 11690 : trans = new Transliteration_casemapping();
49 11690 : cClass = "com.sun.star.i18n.CharacterClassification_Unicode";
50 11690 : }
51 :
52 34383 : cclass_Unicode::~cclass_Unicode() {
53 11461 : destroyParserTable();
54 11461 : delete trans;
55 22922 : }
56 :
57 :
58 : OUString SAL_CALL
59 181886 : cclass_Unicode::toUpper( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
60 181886 : sal_Int32 len = Text.getLength();
61 181886 : if (nPos >= len)
62 209 : return OUString();
63 181677 : if (nCount + nPos > len)
64 1 : nCount = len - nPos;
65 :
66 181677 : trans->setMappingType(MappingTypeToUpper, rLocale);
67 181677 : return trans->transliterateString2String(Text, nPos, nCount);
68 : }
69 :
70 : OUString SAL_CALL
71 19 : cclass_Unicode::toLower( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
72 19 : sal_Int32 len = Text.getLength();
73 19 : if (nPos >= len)
74 0 : return OUString();
75 19 : if (nCount + nPos > len)
76 0 : nCount = len - nPos;
77 :
78 19 : trans->setMappingType(MappingTypeToLower, rLocale);
79 19 : return trans->transliterateString2String(Text, nPos, nCount);
80 : }
81 :
82 : OUString SAL_CALL
83 2 : cclass_Unicode::toTitle( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& rLocale ) throw(RuntimeException) {
84 2 : sal_Int32 len = Text.getLength();
85 2 : if (nPos >= len)
86 0 : return OUString();
87 2 : if (nCount + nPos > len)
88 0 : nCount = len - nPos;
89 :
90 2 : trans->setMappingType(MappingTypeToTitle, rLocale);
91 2 : rtl_uString* pStr = rtl_uString_alloc(nCount);
92 2 : sal_Unicode* out = pStr->buffer;
93 2 : BreakIteratorImpl brk(m_xContext);
94 : Boundary bdy = brk.getWordBoundary(Text, nPos, rLocale,
95 2 : WordType::ANYWORD_IGNOREWHITESPACES, sal_True);
96 12 : for (sal_Int32 i = nPos; i < nCount + nPos; i++, out++) {
97 10 : if (i >= bdy.endPos)
98 : bdy = brk.nextWord(Text, bdy.endPos, rLocale,
99 1 : WordType::ANYWORD_IGNOREWHITESPACES);
100 : *out = (i == bdy.startPos) ?
101 10 : trans->transliterateChar2Char(Text[i]) : Text[i];
102 : }
103 2 : *out = 0;
104 2 : return OUString( pStr, SAL_NO_ACQUIRE );
105 : }
106 :
107 : sal_Int16 SAL_CALL
108 16987 : cclass_Unicode::getType( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
109 16987 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
110 16010 : return (sal_Int16) u_charType(Text.iterateCodePoints(&nPos, 0));
111 : }
112 :
113 : sal_Int16 SAL_CALL
114 94 : cclass_Unicode::getCharacterDirection( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
115 94 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
116 94 : return (sal_Int16) u_charDirection(Text.iterateCodePoints(&nPos, 0));
117 : }
118 :
119 :
120 : sal_Int16 SAL_CALL
121 0 : cclass_Unicode::getScript( const OUString& Text, sal_Int32 nPos ) throw(RuntimeException) {
122 0 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
123 : // ICU Unicode script type UBlockCode starts from 1 for Basci Latin,
124 : // while OO.o enum UnicideScript starts from 0.
125 : // To map ICU UBlockCode to OO.o UnicodeScript, it needs to shift 1.
126 0 : return (sal_Int16) ublock_getCode(Text.iterateCodePoints(&nPos, 0))-1;
127 : }
128 :
129 :
130 : sal_Int32 SAL_CALL
131 2656 : cclass_Unicode::getCharType( const OUString& Text, sal_Int32* nPos, sal_Int32 increment) {
132 : using namespace ::com::sun::star::i18n::KCharacterType;
133 :
134 2656 : sal_uInt32 ch = Text.iterateCodePoints(nPos, increment);
135 2656 : if (increment > 0) ch = Text.iterateCodePoints(nPos, 0);
136 2656 : switch ( u_charType(ch) ) {
137 : // Upper
138 : case U_UPPERCASE_LETTER :
139 152 : return UPPER|LETTER|PRINTABLE|BASE_FORM;
140 :
141 : // Lower
142 : case U_LOWERCASE_LETTER :
143 313 : return LOWER|LETTER|PRINTABLE|BASE_FORM;
144 :
145 : // Title
146 : case U_TITLECASE_LETTER :
147 0 : return TITLE_CASE|LETTER|PRINTABLE|BASE_FORM;
148 :
149 : // Letter
150 : case U_MODIFIER_LETTER :
151 : case U_OTHER_LETTER :
152 8 : return LETTER|PRINTABLE|BASE_FORM;
153 :
154 : // Digit
155 : case U_DECIMAL_DIGIT_NUMBER:
156 : case U_LETTER_NUMBER:
157 : case U_OTHER_NUMBER:
158 123 : return DIGIT|PRINTABLE|BASE_FORM;
159 :
160 : // Base
161 : case U_NON_SPACING_MARK:
162 : case U_ENCLOSING_MARK:
163 : case U_COMBINING_SPACING_MARK:
164 0 : return BASE_FORM|PRINTABLE;
165 :
166 : // Print
167 : case U_SPACE_SEPARATOR:
168 :
169 : case U_DASH_PUNCTUATION:
170 : case U_INITIAL_PUNCTUATION:
171 : case U_FINAL_PUNCTUATION:
172 : case U_CONNECTOR_PUNCTUATION:
173 : case U_OTHER_PUNCTUATION:
174 :
175 : case U_MATH_SYMBOL:
176 : case U_CURRENCY_SYMBOL:
177 : case U_MODIFIER_SYMBOL:
178 : case U_OTHER_SYMBOL:
179 1958 : return PRINTABLE;
180 :
181 : // Control
182 : case U_CONTROL_CHAR:
183 : case U_FORMAT_CHAR:
184 101 : return CONTROL;
185 :
186 : case U_LINE_SEPARATOR:
187 : case U_PARAGRAPH_SEPARATOR:
188 0 : return CONTROL|PRINTABLE;
189 :
190 : // for all others
191 : default:
192 1 : return U_GENERAL_OTHER_TYPES;
193 : }
194 : }
195 :
196 : sal_Int32 SAL_CALL
197 53 : cclass_Unicode::getCharacterType( const OUString& Text, sal_Int32 nPos, const Locale& /*rLocale*/ ) throw(RuntimeException) {
198 53 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
199 53 : return getCharType(Text, &nPos, 0);
200 :
201 : }
202 :
203 : sal_Int32 SAL_CALL
204 2603 : cclass_Unicode::getStringType( const OUString& Text, sal_Int32 nPos, sal_Int32 nCount, const Locale& /*rLocale*/ ) throw(RuntimeException) {
205 2603 : if ( nPos < 0 || Text.getLength() <= nPos ) return 0;
206 :
207 2603 : sal_Int32 result = getCharType(Text, &nPos, 0);
208 2603 : for (sal_Int32 i = 1; i < nCount && nPos < Text.getLength(); i++)
209 0 : result |= getCharType(Text, &nPos, 1);
210 2603 : return result;
211 : }
212 :
213 11515 : ParseResult SAL_CALL cclass_Unicode::parseAnyToken(
214 : const OUString& Text,
215 : sal_Int32 nPos,
216 : const Locale& rLocale,
217 : sal_Int32 startCharTokenType,
218 : const OUString& userDefinedCharactersStart,
219 : sal_Int32 contCharTokenType,
220 : const OUString& userDefinedCharactersCont )
221 : throw(RuntimeException)
222 : {
223 11515 : ParseResult r;
224 11515 : if ( Text.getLength() <= nPos )
225 977 : return r;
226 :
227 : setupParserTable( rLocale,
228 : startCharTokenType, userDefinedCharactersStart,
229 10538 : contCharTokenType, userDefinedCharactersCont );
230 10538 : parseText( r, Text, nPos );
231 :
232 10538 : return r;
233 : }
234 :
235 :
236 911 : ParseResult SAL_CALL cclass_Unicode::parsePredefinedToken(
237 : sal_Int32 nTokenType,
238 : const OUString& Text,
239 : sal_Int32 nPos,
240 : const Locale& rLocale,
241 : sal_Int32 startCharTokenType,
242 : const OUString& userDefinedCharactersStart,
243 : sal_Int32 contCharTokenType,
244 : const OUString& userDefinedCharactersCont )
245 : throw(RuntimeException)
246 : {
247 911 : ParseResult r;
248 911 : if ( Text.getLength() <= nPos )
249 0 : return r;
250 :
251 : setupParserTable( rLocale,
252 : startCharTokenType, userDefinedCharactersStart,
253 911 : contCharTokenType, userDefinedCharactersCont );
254 911 : parseText( r, Text, nPos, nTokenType );
255 :
256 911 : return r;
257 : }
258 :
259 0 : OUString SAL_CALL cclass_Unicode::getImplementationName() throw( RuntimeException )
260 : {
261 0 : return OUString::createFromAscii(cClass);
262 : }
263 :
264 :
265 0 : sal_Bool SAL_CALL cclass_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException )
266 : {
267 0 : return !rServiceName.compareToAscii(cClass);
268 : }
269 :
270 0 : Sequence< OUString > SAL_CALL cclass_Unicode::getSupportedServiceNames() throw( RuntimeException )
271 : {
272 0 : Sequence< OUString > aRet(1);
273 0 : aRet[0] = OUString::createFromAscii(cClass);
274 0 : return aRet;
275 : }
276 :
277 : } } } }
278 :
279 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|