Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <config_locales.h>
21 :
22 : #include "lrl_include.hxx"
23 :
24 : #include <rtl/ustrbuf.hxx>
25 : #include <i18nlangtag/languagetag.hxx>
26 : #include <i18nlangtag/languagetagicu.hxx>
27 : #include <collator_unicode.hxx>
28 : #include <localedata.hxx>
29 : #include <com/sun/star/i18n/CollatorOptions.hpp>
30 : #include <cppuhelper/supportsservice.hxx>
31 :
32 : using namespace ::com::sun::star;
33 : using namespace ::com::sun::star::lang;
34 : using namespace ::com::sun::star::uno;
35 :
36 : namespace com { namespace sun { namespace star { namespace i18n {
37 :
38 86 : Collator_Unicode::Collator_Unicode()
39 : {
40 86 : implementationName = "com.sun.star.i18n.Collator_Unicode";
41 86 : collator = NULL;
42 86 : uca_base = NULL;
43 : #ifndef DISABLE_DYNLOADING
44 86 : hModule = NULL;
45 : #endif
46 86 : }
47 :
48 201 : Collator_Unicode::~Collator_Unicode()
49 : {
50 67 : if (collator) delete collator;
51 67 : if (uca_base) delete uca_base;
52 : #ifndef DISABLE_DYNLOADING
53 67 : if (hModule) osl_unloadModule(hModule);
54 : #endif
55 134 : }
56 :
57 : #ifdef DISABLE_DYNLOADING
58 :
59 : extern "C" {
60 :
61 : // For DISABLE_DYNLOADING the generated functions have names that
62 : // start with get_collator_data_ to avoid clashing with a few
63 : // functions in the generated libindex_data that are called just
64 : // get_zh_pinyin for instance.
65 :
66 : const sal_uInt8* get_collator_data_ca_charset();
67 : const sal_uInt8* get_collator_data_cu_charset();
68 : const sal_uInt8* get_collator_data_dz_charset();
69 : const sal_uInt8* get_collator_data_hu_charset();
70 : const sal_uInt8* get_collator_data_ja_charset();
71 : const sal_uInt8* get_collator_data_ja_phonetic_alphanumeric_first();
72 : const sal_uInt8* get_collator_data_ja_phonetic_alphanumeric_last();
73 : const sal_uInt8* get_collator_data_ko_charset();
74 : const sal_uInt8* get_collator_data_ku_alphanumeric();
75 : const sal_uInt8* get_collator_data_ln_charset();
76 : const sal_uInt8* get_collator_data_my_dictionary();
77 : const sal_uInt8* get_collator_data_ne_charset();
78 : const sal_uInt8* get_collator_data_sid_charset();
79 : const sal_uInt8* get_collator_data_zh_TW_charset();
80 : const sal_uInt8* get_collator_data_zh_TW_radical();
81 : const sal_uInt8* get_collator_data_zh_TW_stroke();
82 : const sal_uInt8* get_collator_data_zh_charset();
83 : const sal_uInt8* get_collator_data_zh_pinyin();
84 : const sal_uInt8* get_collator_data_zh_radical();
85 : const sal_uInt8* get_collator_data_zh_stroke();
86 : const sal_uInt8* get_collator_data_zh_zhuyin();
87 :
88 : size_t get_collator_data_ca_charset_length();
89 : size_t get_collator_data_cu_charset_length();
90 : size_t get_collator_data_dz_charset_length();
91 : size_t get_collator_data_hu_charset_length();
92 : size_t get_collator_data_ja_charset_length();
93 : size_t get_collator_data_ja_phonetic_alphanumeric_first_length();
94 : size_t get_collator_data_ja_phonetic_alphanumeric_last_length();
95 : size_t get_collator_data_ko_charset_length();
96 : size_t get_collator_data_ku_alphanumeric_length();
97 : size_t get_collator_data_ln_charset_length();
98 : size_t get_collator_data_my_dictionary_length();
99 : size_t get_collator_data_ne_charset_length();
100 : size_t get_collator_data_sid_charset_length();
101 : size_t get_collator_data_zh_TW_charset_length();
102 : size_t get_collator_data_zh_TW_radical_length();
103 : size_t get_collator_data_zh_TW_stroke_length();
104 : size_t get_collator_data_zh_charset_length();
105 : size_t get_collator_data_zh_pinyin_length();
106 : size_t get_collator_data_zh_radical_length();
107 : size_t get_collator_data_zh_stroke_length();
108 : size_t get_collator_data_zh_zhuyin_length();
109 :
110 : }
111 :
112 : #endif
113 :
114 : sal_Int32 SAL_CALL
115 462 : Collator_Unicode::compareSubstring( const OUString& str1, sal_Int32 off1, sal_Int32 len1,
116 : const OUString& str2, sal_Int32 off2, sal_Int32 len2) throw(RuntimeException, std::exception)
117 : {
118 462 : return collator->compare(reinterpret_cast<const UChar *>(str1.getStr()) + off1, len1, reinterpret_cast<const UChar *>(str2.getStr()) + off2, len2); // UChar != sal_Unicode in MinGW
119 : }
120 :
121 : sal_Int32 SAL_CALL
122 42784 : Collator_Unicode::compareString( const OUString& str1, const OUString& str2) throw(RuntimeException, std::exception)
123 : {
124 42784 : return collator->compare(reinterpret_cast<const UChar *>(str1.getStr()), reinterpret_cast<const UChar *>(str2.getStr())); // UChar != sal_Unicode in MinGW
125 : }
126 :
127 : #ifndef DISABLE_DYNLOADING
128 :
129 0 : extern "C" { static void SAL_CALL thisModule() {} }
130 :
131 : #endif
132 :
133 : sal_Int32 SAL_CALL
134 86 : Collator_Unicode::loadCollatorAlgorithm(const OUString& rAlgorithm, const lang::Locale& rLocale, sal_Int32 options)
135 : throw(RuntimeException, std::exception)
136 : {
137 86 : if (!collator) {
138 86 : UErrorCode status = U_ZERO_ERROR;
139 86 : OUString rule = LocaleDataImpl().getCollatorRuleByAlgorithm(rLocale, rAlgorithm);
140 86 : if (!rule.isEmpty()) {
141 0 : collator = new RuleBasedCollator(reinterpret_cast<const UChar *>(rule.getStr()), status); // UChar != sal_Unicode in MinGW
142 0 : if (! U_SUCCESS(status)) throw RuntimeException();
143 : }
144 86 : if (!collator && OUString(LOCAL_RULE_LANGS).indexOf(rLocale.Language) >= 0) {
145 0 : const sal_uInt8* (*func)() = NULL;
146 0 : size_t (*funclen)() = NULL;
147 :
148 : #ifndef DISABLE_DYNLOADING
149 0 : OUStringBuffer aBuf;
150 : #ifdef SAL_DLLPREFIX
151 0 : aBuf.appendAscii(SAL_DLLPREFIX);
152 : #endif
153 0 : aBuf.appendAscii( "collator_data" ).appendAscii( SAL_DLLEXTENSION );
154 0 : hModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT );
155 0 : if (hModule) {
156 0 : aBuf.appendAscii("get_").append(rLocale.Language).appendAscii("_");
157 0 : if ( rLocale.Language == "zh" ) {
158 0 : OUString func_base = aBuf.makeStringAndClear();
159 0 : OUString funclen_base = func_base + "_length";
160 0 : if (OUString("TW HK MO").indexOf(rLocale.Country) >= 0)
161 : {
162 : func = reinterpret_cast<const sal_uInt8* (*)()>(osl_getFunctionSymbol(hModule,
163 0 : OUString(func_base + "TW_" + rAlgorithm).pData));
164 : funclen = reinterpret_cast<size_t (*)()>(osl_getFunctionSymbol(hModule,
165 0 : OUString(funclen_base + "TW_" + rAlgorithm).pData));
166 : }
167 0 : if (!func)
168 : {
169 : func = reinterpret_cast<const sal_uInt8* (*)()>(osl_getFunctionSymbol(
170 0 : hModule, OUString(func_base + rAlgorithm).pData));
171 : funclen = reinterpret_cast<size_t (*)()>(osl_getFunctionSymbol(
172 0 : hModule, OUString(funclen_base + rAlgorithm).pData));
173 0 : }
174 : } else {
175 0 : if ( rLocale.Language == "ja" ) {
176 : // replace algorithm name to implementation name.
177 0 : if (rAlgorithm == "phonetic (alphanumeric first)")
178 0 : aBuf.appendAscii("phonetic_alphanumeric_first");
179 0 : else if (rAlgorithm == "phonetic (alphanumeric last)")
180 0 : aBuf.appendAscii("phonetic_alphanumeric_last");
181 : else
182 0 : aBuf.append(rAlgorithm);
183 : } else {
184 0 : aBuf.append(rAlgorithm);
185 : }
186 0 : OUString func_base = aBuf.makeStringAndClear();
187 0 : OUString funclen_base = func_base + "_length";
188 0 : func = reinterpret_cast<const sal_uInt8* (*)()>(osl_getFunctionSymbol(hModule, func_base.pData));
189 0 : funclen = reinterpret_cast<size_t (*)()>(osl_getFunctionSymbol(hModule, funclen_base.pData));
190 : }
191 : }
192 : #else
193 : if (false) {
194 : ;
195 : #if WITH_LOCALE_ALL || WITH_LOCALE_ca
196 : } else if ( rLocale.Language == "ca" ) {
197 : if ( rAlgorithm == "charset" )
198 : {
199 : func = get_collator_data_ca_charset;
200 : funclen = get_collator_data_ca_charset_length;
201 : }
202 : #endif
203 : #if WITH_LOCALE_ALL || WITH_LOCALE_cu
204 : } else if ( rLocale.Language == "cu" ) {
205 : if ( rAlgorithm == "charset" )
206 : {
207 : func = get_collator_data_cu_charset;
208 : funclen = get_collator_data_cu_charset_length;
209 : }
210 : #endif
211 : #if WITH_LOCALE_ALL || WITH_LOCALE_dz
212 : } else if ( rLocale.Language == "dz" || rLocale.Language == "bo" ) {
213 : // 'bo' Tibetan uses the same collation rules as 'dz' Dzongkha
214 : if ( rAlgorithm == "charset" )
215 : {
216 : func = get_collator_data_dz_charset;
217 : funclen = get_collator_data_dz_charset_length;
218 : }
219 : #endif
220 : #if WITH_LOCALE_ALL || WITH_LOCALE_hu
221 : } else if ( rLocale.Language == "hu" ) {
222 : if ( rAlgorithm == "charset" )
223 : {
224 : func = get_collator_data_hu_charset;
225 : funclen = get_collator_data_hu_charset_length;
226 : }
227 : #endif
228 : #if WITH_LOCALE_ALL || WITH_LOCALE_ja
229 : } else if ( rLocale.Language == "ja" ) {
230 : if ( rAlgorithm == "charset" )
231 : {
232 : func = get_collator_data_ja_charset;
233 : funclen = get_collator_data_ja_charset_length;
234 : }
235 : else if ( rAlgorithm == "phonetic (alphanumeric first)" )
236 : {
237 : func = get_collator_data_ja_phonetic_alphanumeric_first;
238 : funclen = get_collator_data_ja_phonetic_alphanumeric_first_length;
239 : }
240 : else if ( rAlgorithm == "phonetic (alphanumeric last)" )
241 : {
242 : func = get_collator_data_ja_phonetic_alphanumeric_last;
243 : funclen = get_collator_data_ja_phonetic_alphanumeric_last_length;
244 : }
245 : #endif
246 : #if WITH_LOCALE_ALL || WITH_LOCALE_ko
247 : #if (U_ICU_VERSION_MAJOR_NUM < 53)
248 : } else if ( rLocale.Language == "ko" ) {
249 : if ( rAlgorithm == "charset" )
250 : {
251 : func = get_collator_data_ko_charset;
252 : funclen = get_collator_data_ko_charset_length;
253 : }
254 : #endif
255 : #endif
256 : #if WITH_LOCALE_ALL || WITH_LOCALE_ku
257 : } else if ( rLocale.Language == "ku" ) {
258 : if ( rAlgorithm == "alphanumeric" )
259 : {
260 : func = get_collator_data_ku_alphanumeric;
261 : funclen = get_collator_data_ku_alphanumeric_length;
262 : }
263 : #endif
264 : #if WITH_LOCALE_ALL || WITH_LOCALE_ln
265 : } else if ( rLocale.Language == "ln" ) {
266 : if ( rAlgorithm == "charset" )
267 : {
268 : func = get_collator_data_ln_charset;
269 : funclen = get_collator_data_ln_charset_length;
270 : }
271 : #endif
272 : #if WITH_LOCALE_ALL || WITH_LOCALE_my
273 : } else if ( rLocale.Language == "my" ) {
274 : if ( rAlgorithm == "dictionary" )
275 : {
276 : func = get_collator_data_my_dictionary;
277 : funclen = get_collator_data_my_dictionary_length;
278 : }
279 : #endif
280 : #if WITH_LOCALE_ALL || WITH_LOCALE_ne
281 : } else if ( rLocale.Language == "ne" ) {
282 : if ( rAlgorithm == "charset" )
283 : {
284 : func = get_collator_data_ne_charset;
285 : funclen = get_collator_data_ne_charset_length;
286 : }
287 : #endif
288 : #if WITH_LOCALE_ALL || WITH_LOCALE_sid
289 : } else if ( rLocale.Language == "sid" ) {
290 : if ( rAlgorithm == "charset" )
291 : {
292 : func = get_collator_data_sid_charset;
293 : funclen = get_collator_data_sid_charset_length;
294 : }
295 : #endif
296 : #if WITH_LOCALE_ALL || WITH_LOCALE_zh
297 : } else if ( rLocale.Language == "zh" && (rLocale.Country == "TW" || rLocale.Country == "HK" || rLocale.Country == "MO") ) {
298 : if ( rAlgorithm == "charset" )
299 : {
300 : func = get_collator_data_zh_TW_charset;
301 : funclen = get_collator_data_zh_TW_charset_length;
302 : }
303 : else if ( rAlgorithm == "radical" )
304 : {
305 : func = get_collator_data_zh_TW_radical;
306 : funclen = get_collator_data_zh_TW_radical_length;
307 : }
308 : else if ( rAlgorithm == "stroke" )
309 : {
310 : func = get_collator_data_zh_TW_stroke;
311 : funclen = get_collator_data_zh_TW_stroke_length;
312 : }
313 : } else if ( rLocale.Language == "zh" ) {
314 : if ( rAlgorithm == "charset" )
315 : {
316 : func = get_collator_data_zh_charset;
317 : funclen = get_collator_data_zh_charset_length;
318 : }
319 : else if ( rAlgorithm == "pinyin" )
320 : {
321 : func = get_collator_data_zh_pinyin;
322 : funclen = get_collator_data_zh_pinyin_length;
323 : }
324 : else if ( rAlgorithm == "radical" )
325 : {
326 : func = get_collator_data_zh_radical;
327 : funclen = get_collator_data_zh_radical_length;
328 : }
329 : else if ( rAlgorithm == "stroke" )
330 : {
331 : func = get_collator_data_zh_stroke;
332 : funclen = get_collator_data_zh_stroke_length;
333 : }
334 : else if ( rAlgorithm == "zhuyin" )
335 : {
336 : func = get_collator_data_zh_zhuyin;
337 : funclen = get_collator_data_zh_zhuyin_length;
338 : }
339 : #endif
340 : }
341 : #endif // DISABLE_DYNLOADING
342 0 : if (func && funclen) {
343 0 : const sal_uInt8* ruleImage=func();
344 0 : size_t ruleImageSize = funclen();
345 :
346 : #if (U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM <= 2)
347 : uca_base = new RuleBasedCollator(static_cast<UChar*>(NULL), status);
348 : #else
349 : // Not only changed ICU 53.1 the API behavior that a negative
350 : // length (ruleImageSize) now leads to failure, but also that
351 : // the base RuleBasedCollator passed as uca_base here needs to
352 : // have a base->tailoring == CollationRoot::getRoot() otherwise
353 : // the init bails out as well, as it does for the previously
354 : // used "empty" RuleBasedCollator.
355 : // The default collator of the en-US locale would also fulfill
356 : // the requirement. The collator of the actual locale or the
357 : // NULL (default) locale does not.
358 : uca_base = static_cast<RuleBasedCollator*>(icu::Collator::createInstance(
359 0 : icu::Locale::getRoot(), status));
360 : #endif
361 0 : if (! U_SUCCESS(status)) throw RuntimeException();
362 : collator = new RuleBasedCollator(
363 0 : reinterpret_cast<const uint8_t*>(ruleImage), ruleImageSize, uca_base, status);
364 0 : if (! U_SUCCESS(status)) throw RuntimeException();
365 0 : }
366 : }
367 86 : if (!collator) {
368 : /** ICU collators are loaded using a locale only.
369 : ICU uses Variant as collation algorithm name (like de__PHONEBOOK
370 : locale), note the empty territory (Country) designator in this special
371 : case here. The icu::Locale constructor changes the algorithm name to
372 : uppercase itself, so we don't have to bother with that.
373 : */
374 86 : icu::Locale icuLocale( LanguageTagIcu::getIcuLocale( LanguageTag( rLocale), rAlgorithm));
375 : // load ICU collator
376 86 : collator = static_cast<RuleBasedCollator*>( icu::Collator::createInstance(icuLocale, status) );
377 86 : if (! U_SUCCESS(status)) throw RuntimeException();
378 86 : }
379 : }
380 :
381 86 : if (options & CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT)
382 5 : collator->setStrength(Collator::PRIMARY);
383 81 : else if (options & CollatorOptions::CollatorOptions_IGNORE_CASE)
384 22 : collator->setStrength(Collator::SECONDARY);
385 : else
386 59 : collator->setStrength(Collator::TERTIARY);
387 :
388 86 : return 0;
389 : }
390 :
391 :
392 : OUString SAL_CALL
393 0 : Collator_Unicode::getImplementationName() throw( RuntimeException, std::exception )
394 : {
395 0 : return OUString::createFromAscii(implementationName);
396 : }
397 :
398 : sal_Bool SAL_CALL
399 0 : Collator_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException, std::exception )
400 : {
401 0 : return cppu::supportsService(this, rServiceName);
402 : }
403 :
404 : Sequence< OUString > SAL_CALL
405 0 : Collator_Unicode::getSupportedServiceNames() throw( RuntimeException, std::exception )
406 : {
407 0 : Sequence< OUString > aRet(1);
408 0 : aRet[0] = OUString::createFromAscii(implementationName);
409 0 : return aRet;
410 : }
411 :
412 : } } } }
413 :
414 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|