Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <config_locales.h>
21 :
22 : #include "lrl_include.hxx"
23 :
24 : #include <rtl/ustrbuf.hxx>
25 : #include <i18nlangtag/languagetag.hxx>
26 : #include <i18nlangtag/languagetagicu.hxx>
27 : #include <collator_unicode.hxx>
28 : #include <localedata.hxx>
29 : #include <com/sun/star/i18n/CollatorOptions.hpp>
30 : #include <cppuhelper/supportsservice.hxx>
31 :
32 : using namespace ::com::sun::star;
33 : using namespace ::com::sun::star::lang;
34 : using namespace ::com::sun::star::uno;
35 :
36 : namespace com { namespace sun { namespace star { namespace i18n {
37 :
38 157 : Collator_Unicode::Collator_Unicode()
39 : {
40 157 : implementationName = "com.sun.star.i18n.Collator_Unicode";
41 157 : collator = NULL;
42 157 : uca_base = NULL;
43 : #ifndef DISABLE_DYNLOADING
44 157 : hModule = NULL;
45 : #endif
46 157 : }
47 :
48 357 : Collator_Unicode::~Collator_Unicode()
49 : {
50 119 : if (collator) delete collator;
51 119 : if (uca_base) delete uca_base;
52 : #ifndef DISABLE_DYNLOADING
53 119 : if (hModule) osl_unloadModule(hModule);
54 : #endif
55 238 : }
56 :
57 : #ifdef DISABLE_DYNLOADING
58 :
59 : extern "C" {
60 :
61 : // For DISABLE_DYNLOADING the generated functions have names that
62 : // start with get_collator_data_ to avoid clashing with a few
63 : // functions in the generated libindex_data that are called just
64 : // get_zh_pinyin for instance.
65 :
66 : const sal_uInt8* get_collator_data_ca_charset();
67 : const sal_uInt8* get_collator_data_dz_charset();
68 : const sal_uInt8* get_collator_data_hu_charset();
69 : const sal_uInt8* get_collator_data_ja_charset();
70 : const sal_uInt8* get_collator_data_ja_phonetic_alphanumeric_first();
71 : const sal_uInt8* get_collator_data_ja_phonetic_alphanumeric_last();
72 : const sal_uInt8* get_collator_data_ko_charset();
73 : const sal_uInt8* get_collator_data_ku_alphanumeric();
74 : const sal_uInt8* get_collator_data_ln_charset();
75 : const sal_uInt8* get_collator_data_my_dictionary();
76 : const sal_uInt8* get_collator_data_ne_charset();
77 : const sal_uInt8* get_collator_data_sid_charset();
78 : const sal_uInt8* get_collator_data_zh_TW_charset();
79 : const sal_uInt8* get_collator_data_zh_TW_radical();
80 : const sal_uInt8* get_collator_data_zh_TW_stroke();
81 : const sal_uInt8* get_collator_data_zh_charset();
82 : const sal_uInt8* get_collator_data_zh_pinyin();
83 : const sal_uInt8* get_collator_data_zh_radical();
84 : const sal_uInt8* get_collator_data_zh_stroke();
85 : const sal_uInt8* get_collator_data_zh_zhuyin();
86 :
87 : size_t get_collator_data_ca_charset_length();
88 : size_t get_collator_data_dz_charset_length();
89 : size_t get_collator_data_hu_charset_length();
90 : size_t get_collator_data_ja_charset_length();
91 : size_t get_collator_data_ja_phonetic_alphanumeric_first_length();
92 : size_t get_collator_data_ja_phonetic_alphanumeric_last_length();
93 : size_t get_collator_data_ko_charset_length();
94 : size_t get_collator_data_ku_alphanumeric_length();
95 : size_t get_collator_data_ln_charset_length();
96 : size_t get_collator_data_my_dictionary_length();
97 : size_t get_collator_data_ne_charset_length();
98 : size_t get_collator_data_sid_charset_length();
99 : size_t get_collator_data_zh_TW_charset_length();
100 : size_t get_collator_data_zh_TW_radical_length();
101 : size_t get_collator_data_zh_TW_stroke_length();
102 : size_t get_collator_data_zh_charset_length();
103 : size_t get_collator_data_zh_pinyin_length();
104 : size_t get_collator_data_zh_radical_length();
105 : size_t get_collator_data_zh_stroke_length();
106 : size_t get_collator_data_zh_zhuyin_length();
107 :
108 : }
109 :
110 : #endif
111 :
112 : sal_Int32 SAL_CALL
113 1593 : Collator_Unicode::compareSubstring( const OUString& str1, sal_Int32 off1, sal_Int32 len1,
114 : const OUString& str2, sal_Int32 off2, sal_Int32 len2) throw(RuntimeException, std::exception)
115 : {
116 1593 : return collator->compare(reinterpret_cast<const UChar *>(str1.getStr()) + off1, len1, reinterpret_cast<const UChar *>(str2.getStr()) + off2, len2); // UChar != sal_Unicode in MinGW
117 : }
118 :
119 : sal_Int32 SAL_CALL
120 85318 : Collator_Unicode::compareString( const OUString& str1, const OUString& str2) throw(RuntimeException, std::exception)
121 : {
122 85318 : return collator->compare(reinterpret_cast<const UChar *>(str1.getStr()), reinterpret_cast<const UChar *>(str2.getStr())); // UChar != sal_Unicode in MinGW
123 : }
124 :
125 : #ifndef DISABLE_DYNLOADING
126 :
127 0 : extern "C" { static void SAL_CALL thisModule() {} }
128 :
129 : #endif
130 :
131 : sal_Int32 SAL_CALL
132 157 : Collator_Unicode::loadCollatorAlgorithm(const OUString& rAlgorithm, const lang::Locale& rLocale, sal_Int32 options)
133 : throw(RuntimeException, std::exception)
134 : {
135 157 : if (!collator) {
136 157 : UErrorCode status = U_ZERO_ERROR;
137 157 : OUString rule = LocaleDataImpl().getCollatorRuleByAlgorithm(rLocale, rAlgorithm);
138 157 : if (!rule.isEmpty()) {
139 0 : collator = new RuleBasedCollator(reinterpret_cast<const UChar *>(rule.getStr()), status); // UChar != sal_Unicode in MinGW
140 0 : if (! U_SUCCESS(status)) throw RuntimeException();
141 : }
142 157 : if (!collator && OUString::createFromAscii(LOCAL_RULE_LANGS).indexOf(rLocale.Language) >= 0) {
143 0 : const sal_uInt8* (*func)() = NULL;
144 0 : size_t (*funclen)() = NULL;
145 :
146 : #ifndef DISABLE_DYNLOADING
147 0 : OUStringBuffer aBuf;
148 : #ifdef SAL_DLLPREFIX
149 0 : aBuf.appendAscii(SAL_DLLPREFIX);
150 : #endif
151 0 : aBuf.appendAscii( "collator_data" ).appendAscii( SAL_DLLEXTENSION );
152 0 : hModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT );
153 0 : if (hModule) {
154 0 : aBuf.appendAscii("get_").append(rLocale.Language).appendAscii("_");
155 0 : if ( rLocale.Language == "zh" ) {
156 0 : OUString func_base = aBuf.makeStringAndClear();
157 0 : OUString funclen_base = func_base + "_length";
158 0 : if (OUString("TW HK MO").indexOf(rLocale.Country) >= 0)
159 : {
160 : func = (const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule,
161 0 : OUString(func_base + "TW_" + rAlgorithm).pData);
162 : funclen = (size_t (*)()) osl_getFunctionSymbol(hModule,
163 0 : OUString(funclen_base + "TW_" + rAlgorithm).pData);
164 : }
165 0 : if (!func)
166 : {
167 : func = (const sal_uInt8* (*)()) osl_getFunctionSymbol(
168 0 : hModule, OUString(func_base + rAlgorithm).pData);
169 : funclen = (size_t (*)()) osl_getFunctionSymbol(
170 0 : hModule, OUString(funclen_base + rAlgorithm).pData);
171 0 : }
172 : } else {
173 0 : if ( rLocale.Language == "ja" ) {
174 : // replace algorithm name to implementation name.
175 0 : if (rAlgorithm == "phonetic (alphanumeric first)")
176 0 : aBuf.appendAscii("phonetic_alphanumeric_first");
177 0 : else if (rAlgorithm == "phonetic (alphanumeric last)")
178 0 : aBuf.appendAscii("phonetic_alphanumeric_last");
179 : else
180 0 : aBuf.append(rAlgorithm);
181 : } else {
182 0 : aBuf.append(rAlgorithm);
183 : }
184 0 : OUString func_base = aBuf.makeStringAndClear();
185 0 : OUString funclen_base = func_base + "_length";
186 0 : func = (const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule, func_base.pData);
187 0 : funclen = (size_t (*)()) osl_getFunctionSymbol(hModule, funclen_base.pData);
188 : }
189 : }
190 : #else
191 : if (false) {
192 : ;
193 : #if WITH_LOCALE_ALL || WITH_LOCALE_ca
194 : } else if ( rLocale.Language == "ca" ) {
195 : if ( rAlgorithm == "charset" )
196 : {
197 : func = get_collator_data_ca_charset;
198 : funclen = get_collator_data_ca_charset_length;
199 : }
200 : #endif
201 : #if WITH_LOCALE_ALL || WITH_LOCALE_dz
202 : } else if ( rLocale.Language == "dz" || rLocale.Language == "bo" ) {
203 : // 'bo' Tibetan uses the same collation rules as 'dz' Dzongkha
204 : if ( rAlgorithm == "charset" )
205 : {
206 : func = get_collator_data_dz_charset;
207 : funclen = get_collator_data_dz_charset_length;
208 : }
209 : #endif
210 : #if WITH_LOCALE_ALL || WITH_LOCALE_hu
211 : } else if ( rLocale.Language == "hu" ) {
212 : if ( rAlgorithm == "charset" )
213 : {
214 : func = get_collator_data_hu_charset;
215 : funclen = get_collator_data_hu_charset_length;
216 : }
217 : #endif
218 : #if WITH_LOCALE_ALL || WITH_LOCALE_ja
219 : } else if ( rLocale.Language == "ja" ) {
220 : if ( rAlgorithm == "charset" )
221 : {
222 : func = get_collator_data_ja_charset;
223 : funclen = get_collator_data_ja_charset_length;
224 : }
225 : else if ( rAlgorithm == "phonetic (alphanumeric first)" )
226 : {
227 : func = get_collator_data_ja_phonetic_alphanumeric_first;
228 : funclen = get_collator_data_ja_phonetic_alphanumeric_first_length;
229 : }
230 : else if ( rAlgorithm == "phonetic (alphanumeric last)" )
231 : {
232 : func = get_collator_data_ja_phonetic_alphanumeric_last;
233 : funclen = get_collator_data_ja_phonetic_alphanumeric_last_length;
234 : }
235 : #endif
236 : #if WITH_LOCALE_ALL || WITH_LOCALE_ko
237 : #if (U_ICU_VERSION_MAJOR_NUM < 53)
238 : } else if ( rLocale.Language == "ko" ) {
239 : if ( rAlgorithm == "charset" )
240 : {
241 : func = get_collator_data_ko_charset;
242 : funclen = get_collator_data_ko_charset_length;
243 : }
244 : #endif
245 : #endif
246 : #if WITH_LOCALE_ALL || WITH_LOCALE_ku
247 : } else if ( rLocale.Language == "ku" ) {
248 : if ( rAlgorithm == "alphanumeric" )
249 : {
250 : func = get_collator_data_ku_alphanumeric;
251 : funclen = get_collator_data_ku_alphanumeric_length;
252 : }
253 : #endif
254 : #if WITH_LOCALE_ALL || WITH_LOCALE_ln
255 : } else if ( rLocale.Language == "ln" ) {
256 : if ( rAlgorithm == "charset" )
257 : {
258 : func = get_collator_data_ln_charset;
259 : funclen = get_collator_data_ln_charset_length;
260 : }
261 : #endif
262 : #if WITH_LOCALE_ALL || WITH_LOCALE_my
263 : } else if ( rLocale.Language == "my" ) {
264 : if ( rAlgorithm == "dictionary" )
265 : {
266 : func = get_collator_data_my_dictionary;
267 : funclen = get_collator_data_my_dictionary_length;
268 : }
269 : #endif
270 : #if WITH_LOCALE_ALL || WITH_LOCALE_ne
271 : } else if ( rLocale.Language == "ne" ) {
272 : if ( rAlgorithm == "charset" )
273 : {
274 : func = get_collator_data_ne_charset;
275 : funclen = get_collator_data_ne_charset_length;
276 : }
277 : #endif
278 : #if WITH_LOCALE_ALL || WITH_LOCALE_sid
279 : } else if ( rLocale.Language == "sid" ) {
280 : if ( rAlgorithm == "charset" )
281 : {
282 : func = get_collator_data_sid_charset;
283 : funclen = get_collator_data_sid_charset_length;
284 : }
285 : #endif
286 : #if WITH_LOCALE_ALL || WITH_LOCALE_zh
287 : } else if ( rLocale.Language == "zh" && (rLocale.Country == "TW" || rLocale.Country == "HK" || rLocale.Country == "MO") ) {
288 : if ( rAlgorithm == "charset" )
289 : {
290 : func = get_collator_data_zh_TW_charset;
291 : funclen = get_collator_data_zh_TW_charset_length;
292 : }
293 : else if ( rAlgorithm == "radical" )
294 : {
295 : func = get_collator_data_zh_TW_radical;
296 : funclen = get_collator_data_zh_TW_radical_length;
297 : }
298 : else if ( rAlgorithm == "stroke" )
299 : {
300 : func = get_collator_data_zh_TW_stroke;
301 : funclen = get_collator_data_zh_TW_stroke_length;
302 : }
303 : } else if ( rLocale.Language == "zh" ) {
304 : if ( rAlgorithm == "charset" )
305 : {
306 : func = get_collator_data_zh_charset;
307 : funclen = get_collator_data_zh_charset_length;
308 : }
309 : else if ( rAlgorithm == "pinyin" )
310 : {
311 : func = get_collator_data_zh_pinyin;
312 : funclen = get_collator_data_zh_pinyin_length;
313 : }
314 : else if ( rAlgorithm == "radical" )
315 : {
316 : func = get_collator_data_zh_radical;
317 : funclen = get_collator_data_zh_radical_length;
318 : }
319 : else if ( rAlgorithm == "stroke" )
320 : {
321 : func = get_collator_data_zh_stroke;
322 : funclen = get_collator_data_zh_stroke_length;
323 : }
324 : else if ( rAlgorithm == "zhuyin" )
325 : {
326 : func = get_collator_data_zh_zhuyin;
327 : funclen = get_collator_data_zh_zhuyin_length;
328 : }
329 : #endif
330 : }
331 : #endif // DISABLE_DYNLOADING
332 0 : if (func && funclen) {
333 0 : const sal_uInt8* ruleImage=func();
334 0 : size_t ruleImageSize = funclen();
335 :
336 : #if (U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM <= 2)
337 : uca_base = new RuleBasedCollator(static_cast<UChar*>(NULL), status);
338 : #else
339 : // Not only changed ICU 53.1 the API behavior that a negative
340 : // length (ruleImageSize) now leads to failure, but also that
341 : // the base RuleBasedCollator passed as uca_base here needs to
342 : // have a base->tailoring == CollationRoot::getRoot() otherwise
343 : // the init bails out as well, as it does for the previously
344 : // used "empty" RuleBasedCollator.
345 : // The default collator of the en-US locale would also fulfill
346 : // the requirement. The collator of the actual locale or the
347 : // NULL (default) locale does not.
348 : uca_base = static_cast<RuleBasedCollator*>(icu::Collator::createInstance(
349 0 : icu::Locale::getRoot(), status));
350 : #endif
351 0 : if (! U_SUCCESS(status)) throw RuntimeException();
352 : collator = new RuleBasedCollator(
353 0 : reinterpret_cast<const uint8_t*>(ruleImage), ruleImageSize, uca_base, status);
354 0 : if (! U_SUCCESS(status)) throw RuntimeException();
355 0 : }
356 : }
357 157 : if (!collator) {
358 : /** ICU collators are loaded using a locale only.
359 : ICU uses Variant as collation algorithm name (like de__PHONEBOOK
360 : locale), note the empty territory (Country) designator in this special
361 : case here. The icu::Locale constructor changes the algorithm name to
362 : uppercase itself, so we don't have to bother with that.
363 : */
364 157 : icu::Locale icuLocale( LanguageTagIcu::getIcuLocale( LanguageTag( rLocale), rAlgorithm));
365 : // load ICU collator
366 157 : collator = static_cast<RuleBasedCollator*>( icu::Collator::createInstance(icuLocale, status) );
367 157 : if (! U_SUCCESS(status)) throw RuntimeException();
368 157 : }
369 : }
370 :
371 157 : if (options & CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT)
372 10 : collator->setStrength(Collator::PRIMARY);
373 147 : else if (options & CollatorOptions::CollatorOptions_IGNORE_CASE)
374 42 : collator->setStrength(Collator::SECONDARY);
375 : else
376 105 : collator->setStrength(Collator::TERTIARY);
377 :
378 157 : return(0);
379 : }
380 :
381 :
382 : OUString SAL_CALL
383 0 : Collator_Unicode::getImplementationName() throw( RuntimeException, std::exception )
384 : {
385 0 : return OUString::createFromAscii(implementationName);
386 : }
387 :
388 : sal_Bool SAL_CALL
389 0 : Collator_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException, std::exception )
390 : {
391 0 : return cppu::supportsService(this, rServiceName);
392 : }
393 :
394 : Sequence< OUString > SAL_CALL
395 0 : Collator_Unicode::getSupportedServiceNames() throw( RuntimeException, std::exception )
396 : {
397 0 : Sequence< OUString > aRet(1);
398 0 : aRet[0] = OUString::createFromAscii(implementationName);
399 0 : return aRet;
400 : }
401 :
402 : } } } }
403 :
404 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|