LCOV - code coverage report
Current view: top level - i18npool/source/collator - collator_unicode.cxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 32 80 40.0 %
Date: 2015-06-13 12:38:46 Functions: 6 10 60.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include <config_locales.h>
      21             : 
      22             : #include "lrl_include.hxx"
      23             : 
      24             : #include <rtl/ustrbuf.hxx>
      25             : #include <i18nlangtag/languagetag.hxx>
      26             : #include <i18nlangtag/languagetagicu.hxx>
      27             : #include <collator_unicode.hxx>
      28             : #include <localedata.hxx>
      29             : #include <com/sun/star/i18n/CollatorOptions.hpp>
      30             : #include <cppuhelper/supportsservice.hxx>
      31             : 
      32             : using namespace ::com::sun::star;
      33             : using namespace ::com::sun::star::lang;
      34             : using namespace ::com::sun::star::uno;
      35             : 
      36             : namespace com { namespace sun { namespace star { namespace i18n {
      37             : 
      38          86 : Collator_Unicode::Collator_Unicode()
      39             : {
      40          86 :     implementationName = "com.sun.star.i18n.Collator_Unicode";
      41          86 :     collator = NULL;
      42          86 :     uca_base = NULL;
      43             : #ifndef DISABLE_DYNLOADING
      44          86 :     hModule = NULL;
      45             : #endif
      46          86 : }
      47             : 
      48         201 : Collator_Unicode::~Collator_Unicode()
      49             : {
      50          67 :     if (collator) delete collator;
      51          67 :     if (uca_base) delete uca_base;
      52             : #ifndef DISABLE_DYNLOADING
      53          67 :     if (hModule) osl_unloadModule(hModule);
      54             : #endif
      55         134 : }
      56             : 
      57             : #ifdef DISABLE_DYNLOADING
      58             : 
      59             : extern "C" {
      60             : 
      61             : // For DISABLE_DYNLOADING the generated functions have names that
      62             : // start with get_collator_data_ to avoid clashing with a few
      63             : // functions in the generated libindex_data that are called just
      64             : // get_zh_pinyin for instance.
      65             : 
      66             : const sal_uInt8* get_collator_data_ca_charset();
      67             : const sal_uInt8* get_collator_data_cu_charset();
      68             : const sal_uInt8* get_collator_data_dz_charset();
      69             : const sal_uInt8* get_collator_data_hu_charset();
      70             : const sal_uInt8* get_collator_data_ja_charset();
      71             : const sal_uInt8* get_collator_data_ja_phonetic_alphanumeric_first();
      72             : const sal_uInt8* get_collator_data_ja_phonetic_alphanumeric_last();
      73             : const sal_uInt8* get_collator_data_ko_charset();
      74             : const sal_uInt8* get_collator_data_ku_alphanumeric();
      75             : const sal_uInt8* get_collator_data_ln_charset();
      76             : const sal_uInt8* get_collator_data_my_dictionary();
      77             : const sal_uInt8* get_collator_data_ne_charset();
      78             : const sal_uInt8* get_collator_data_sid_charset();
      79             : const sal_uInt8* get_collator_data_zh_TW_charset();
      80             : const sal_uInt8* get_collator_data_zh_TW_radical();
      81             : const sal_uInt8* get_collator_data_zh_TW_stroke();
      82             : const sal_uInt8* get_collator_data_zh_charset();
      83             : const sal_uInt8* get_collator_data_zh_pinyin();
      84             : const sal_uInt8* get_collator_data_zh_radical();
      85             : const sal_uInt8* get_collator_data_zh_stroke();
      86             : const sal_uInt8* get_collator_data_zh_zhuyin();
      87             : 
      88             : size_t get_collator_data_ca_charset_length();
      89             : size_t get_collator_data_cu_charset_length();
      90             : size_t get_collator_data_dz_charset_length();
      91             : size_t get_collator_data_hu_charset_length();
      92             : size_t get_collator_data_ja_charset_length();
      93             : size_t get_collator_data_ja_phonetic_alphanumeric_first_length();
      94             : size_t get_collator_data_ja_phonetic_alphanumeric_last_length();
      95             : size_t get_collator_data_ko_charset_length();
      96             : size_t get_collator_data_ku_alphanumeric_length();
      97             : size_t get_collator_data_ln_charset_length();
      98             : size_t get_collator_data_my_dictionary_length();
      99             : size_t get_collator_data_ne_charset_length();
     100             : size_t get_collator_data_sid_charset_length();
     101             : size_t get_collator_data_zh_TW_charset_length();
     102             : size_t get_collator_data_zh_TW_radical_length();
     103             : size_t get_collator_data_zh_TW_stroke_length();
     104             : size_t get_collator_data_zh_charset_length();
     105             : size_t get_collator_data_zh_pinyin_length();
     106             : size_t get_collator_data_zh_radical_length();
     107             : size_t get_collator_data_zh_stroke_length();
     108             : size_t get_collator_data_zh_zhuyin_length();
     109             : 
     110             : }
     111             : 
     112             : #endif
     113             : 
     114             : sal_Int32 SAL_CALL
     115         462 : Collator_Unicode::compareSubstring( const OUString& str1, sal_Int32 off1, sal_Int32 len1,
     116             :     const OUString& str2, sal_Int32 off2, sal_Int32 len2) throw(RuntimeException, std::exception)
     117             : {
     118         462 :     return collator->compare(reinterpret_cast<const UChar *>(str1.getStr()) + off1, len1, reinterpret_cast<const UChar *>(str2.getStr()) + off2, len2); // UChar != sal_Unicode in MinGW
     119             : }
     120             : 
     121             : sal_Int32 SAL_CALL
     122       42784 : Collator_Unicode::compareString( const OUString& str1, const OUString& str2) throw(RuntimeException, std::exception)
     123             : {
     124       42784 :     return collator->compare(reinterpret_cast<const UChar *>(str1.getStr()), reinterpret_cast<const UChar *>(str2.getStr()));   // UChar != sal_Unicode in MinGW
     125             : }
     126             : 
     127             : #ifndef DISABLE_DYNLOADING
     128             : 
     129           0 : extern "C" { static void SAL_CALL thisModule() {} }
     130             : 
     131             : #endif
     132             : 
     133             : sal_Int32 SAL_CALL
     134          86 : Collator_Unicode::loadCollatorAlgorithm(const OUString& rAlgorithm, const lang::Locale& rLocale, sal_Int32 options)
     135             :     throw(RuntimeException, std::exception)
     136             : {
     137          86 :     if (!collator) {
     138          86 :         UErrorCode status = U_ZERO_ERROR;
     139          86 :         OUString rule = LocaleDataImpl().getCollatorRuleByAlgorithm(rLocale, rAlgorithm);
     140          86 :         if (!rule.isEmpty()) {
     141           0 :             collator = new RuleBasedCollator(reinterpret_cast<const UChar *>(rule.getStr()), status);   // UChar != sal_Unicode in MinGW
     142           0 :             if (! U_SUCCESS(status)) throw RuntimeException();
     143             :         }
     144          86 :         if (!collator && OUString(LOCAL_RULE_LANGS).indexOf(rLocale.Language) >= 0) {
     145           0 :             const sal_uInt8* (*func)() = NULL;
     146           0 :             size_t (*funclen)() = NULL;
     147             : 
     148             : #ifndef DISABLE_DYNLOADING
     149           0 :             OUStringBuffer aBuf;
     150             : #ifdef SAL_DLLPREFIX
     151           0 :             aBuf.appendAscii(SAL_DLLPREFIX);
     152             : #endif
     153           0 :             aBuf.appendAscii( "collator_data" ).appendAscii( SAL_DLLEXTENSION );
     154           0 :             hModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT );
     155           0 :             if (hModule) {
     156           0 :                 aBuf.appendAscii("get_").append(rLocale.Language).appendAscii("_");
     157           0 :                 if ( rLocale.Language == "zh" ) {
     158           0 :                     OUString func_base = aBuf.makeStringAndClear();
     159           0 :                     OUString funclen_base = func_base + "_length";
     160           0 :                     if (OUString("TW HK MO").indexOf(rLocale.Country) >= 0)
     161             :                     {
     162             :                         func = reinterpret_cast<const sal_uInt8* (*)()>(osl_getFunctionSymbol(hModule,
     163           0 :                                     OUString(func_base + "TW_" + rAlgorithm).pData));
     164             :                         funclen = reinterpret_cast<size_t (*)()>(osl_getFunctionSymbol(hModule,
     165           0 :                                     OUString(funclen_base + "TW_" + rAlgorithm).pData));
     166             :                     }
     167           0 :                     if (!func)
     168             :                     {
     169             :                         func = reinterpret_cast<const sal_uInt8* (*)()>(osl_getFunctionSymbol(
     170           0 :                                 hModule, OUString(func_base + rAlgorithm).pData));
     171             :                         funclen = reinterpret_cast<size_t (*)()>(osl_getFunctionSymbol(
     172           0 :                                 hModule, OUString(funclen_base + rAlgorithm).pData));
     173           0 :                     }
     174             :                 } else {
     175           0 :                     if ( rLocale.Language == "ja" ) {
     176             :                         // replace algorithm name to implementation name.
     177           0 :                         if (rAlgorithm == "phonetic (alphanumeric first)")
     178           0 :                             aBuf.appendAscii("phonetic_alphanumeric_first");
     179           0 :                         else if (rAlgorithm == "phonetic (alphanumeric last)")
     180           0 :                             aBuf.appendAscii("phonetic_alphanumeric_last");
     181             :                         else
     182           0 :                             aBuf.append(rAlgorithm);
     183             :                     } else {
     184           0 :                         aBuf.append(rAlgorithm);
     185             :                     }
     186           0 :                     OUString func_base = aBuf.makeStringAndClear();
     187           0 :                     OUString funclen_base = func_base + "_length";
     188           0 :                     func = reinterpret_cast<const sal_uInt8* (*)()>(osl_getFunctionSymbol(hModule, func_base.pData));
     189           0 :                     funclen = reinterpret_cast<size_t (*)()>(osl_getFunctionSymbol(hModule, funclen_base.pData));
     190             :                 }
     191             :             }
     192             : #else
     193             :             if (false) {
     194             :                 ;
     195             : #if WITH_LOCALE_ALL || WITH_LOCALE_ca
     196             :             } else if ( rLocale.Language == "ca" ) {
     197             :                 if ( rAlgorithm == "charset" )
     198             :                 {
     199             :                     func = get_collator_data_ca_charset;
     200             :                     funclen = get_collator_data_ca_charset_length;
     201             :                 }
     202             : #endif
     203             : #if WITH_LOCALE_ALL || WITH_LOCALE_cu
     204             :             } else if ( rLocale.Language == "cu" ) {
     205             :                 if ( rAlgorithm == "charset" )
     206             :                 {
     207             :                     func = get_collator_data_cu_charset;
     208             :                     funclen = get_collator_data_cu_charset_length;
     209             :                 }
     210             : #endif
     211             : #if WITH_LOCALE_ALL || WITH_LOCALE_dz
     212             :             } else if ( rLocale.Language == "dz" || rLocale.Language == "bo" ) {
     213             :                 // 'bo' Tibetan uses the same collation rules as 'dz' Dzongkha
     214             :                 if ( rAlgorithm == "charset" )
     215             :                 {
     216             :                     func = get_collator_data_dz_charset;
     217             :                     funclen = get_collator_data_dz_charset_length;
     218             :                 }
     219             : #endif
     220             : #if WITH_LOCALE_ALL || WITH_LOCALE_hu
     221             :             } else if ( rLocale.Language == "hu" ) {
     222             :                 if ( rAlgorithm == "charset" )
     223             :                 {
     224             :                     func = get_collator_data_hu_charset;
     225             :                     funclen = get_collator_data_hu_charset_length;
     226             :                 }
     227             : #endif
     228             : #if WITH_LOCALE_ALL || WITH_LOCALE_ja
     229             :             } else if ( rLocale.Language == "ja" ) {
     230             :                 if ( rAlgorithm == "charset" )
     231             :                 {
     232             :                     func = get_collator_data_ja_charset;
     233             :                     funclen = get_collator_data_ja_charset_length;
     234             :                 }
     235             :                 else if ( rAlgorithm == "phonetic (alphanumeric first)" )
     236             :                 {
     237             :                     func = get_collator_data_ja_phonetic_alphanumeric_first;
     238             :                     funclen = get_collator_data_ja_phonetic_alphanumeric_first_length;
     239             :                 }
     240             :                 else if ( rAlgorithm == "phonetic (alphanumeric last)" )
     241             :                 {
     242             :                     func = get_collator_data_ja_phonetic_alphanumeric_last;
     243             :                     funclen = get_collator_data_ja_phonetic_alphanumeric_last_length;
     244             :                 }
     245             : #endif
     246             : #if WITH_LOCALE_ALL || WITH_LOCALE_ko
     247             : #if (U_ICU_VERSION_MAJOR_NUM < 53)
     248             :             } else if ( rLocale.Language == "ko" ) {
     249             :                 if ( rAlgorithm == "charset" )
     250             :                 {
     251             :                     func = get_collator_data_ko_charset;
     252             :                     funclen = get_collator_data_ko_charset_length;
     253             :                 }
     254             : #endif
     255             : #endif
     256             : #if WITH_LOCALE_ALL || WITH_LOCALE_ku
     257             :             } else if ( rLocale.Language == "ku" ) {
     258             :                 if ( rAlgorithm == "alphanumeric" )
     259             :                 {
     260             :                     func = get_collator_data_ku_alphanumeric;
     261             :                     funclen = get_collator_data_ku_alphanumeric_length;
     262             :                 }
     263             : #endif
     264             : #if WITH_LOCALE_ALL || WITH_LOCALE_ln
     265             :             } else if ( rLocale.Language == "ln" ) {
     266             :                 if ( rAlgorithm == "charset" )
     267             :                 {
     268             :                     func = get_collator_data_ln_charset;
     269             :                     funclen = get_collator_data_ln_charset_length;
     270             :                 }
     271             : #endif
     272             : #if WITH_LOCALE_ALL || WITH_LOCALE_my
     273             :             } else if ( rLocale.Language == "my" ) {
     274             :                 if ( rAlgorithm == "dictionary" )
     275             :                 {
     276             :                     func = get_collator_data_my_dictionary;
     277             :                     funclen = get_collator_data_my_dictionary_length;
     278             :                 }
     279             : #endif
     280             : #if WITH_LOCALE_ALL || WITH_LOCALE_ne
     281             :             } else if ( rLocale.Language == "ne" ) {
     282             :                 if ( rAlgorithm == "charset" )
     283             :                 {
     284             :                     func = get_collator_data_ne_charset;
     285             :                     funclen = get_collator_data_ne_charset_length;
     286             :                 }
     287             : #endif
     288             : #if WITH_LOCALE_ALL || WITH_LOCALE_sid
     289             :             } else if ( rLocale.Language == "sid" ) {
     290             :                 if ( rAlgorithm == "charset" )
     291             :                 {
     292             :                     func = get_collator_data_sid_charset;
     293             :                     funclen = get_collator_data_sid_charset_length;
     294             :                 }
     295             : #endif
     296             : #if WITH_LOCALE_ALL || WITH_LOCALE_zh
     297             :             } else if ( rLocale.Language == "zh" && (rLocale.Country == "TW" || rLocale.Country == "HK" || rLocale.Country == "MO") ) {
     298             :                 if ( rAlgorithm == "charset" )
     299             :                 {
     300             :                     func = get_collator_data_zh_TW_charset;
     301             :                     funclen = get_collator_data_zh_TW_charset_length;
     302             :                 }
     303             :                 else if ( rAlgorithm == "radical" )
     304             :                 {
     305             :                     func = get_collator_data_zh_TW_radical;
     306             :                     funclen = get_collator_data_zh_TW_radical_length;
     307             :                 }
     308             :                 else if ( rAlgorithm == "stroke" )
     309             :                 {
     310             :                     func = get_collator_data_zh_TW_stroke;
     311             :                     funclen = get_collator_data_zh_TW_stroke_length;
     312             :                 }
     313             :             } else if ( rLocale.Language == "zh" ) {
     314             :                 if ( rAlgorithm == "charset" )
     315             :                 {
     316             :                     func = get_collator_data_zh_charset;
     317             :                     funclen = get_collator_data_zh_charset_length;
     318             :                 }
     319             :                 else if ( rAlgorithm == "pinyin" )
     320             :                 {
     321             :                     func = get_collator_data_zh_pinyin;
     322             :                     funclen = get_collator_data_zh_pinyin_length;
     323             :                 }
     324             :                 else if ( rAlgorithm == "radical" )
     325             :                 {
     326             :                     func = get_collator_data_zh_radical;
     327             :                     funclen = get_collator_data_zh_radical_length;
     328             :                 }
     329             :                 else if ( rAlgorithm == "stroke" )
     330             :                 {
     331             :                     func = get_collator_data_zh_stroke;
     332             :                     funclen = get_collator_data_zh_stroke_length;
     333             :                 }
     334             :                 else if ( rAlgorithm == "zhuyin" )
     335             :                 {
     336             :                     func = get_collator_data_zh_zhuyin;
     337             :                     funclen = get_collator_data_zh_zhuyin_length;
     338             :                 }
     339             : #endif
     340             :             }
     341             : #endif // DISABLE_DYNLOADING
     342           0 :             if (func && funclen) {
     343           0 :                 const sal_uInt8* ruleImage=func();
     344           0 :                 size_t ruleImageSize = funclen();
     345             : 
     346             : #if (U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM <= 2)
     347             :                 uca_base = new RuleBasedCollator(static_cast<UChar*>(NULL), status);
     348             : #else
     349             :                 // Not only changed ICU 53.1 the API behavior that a negative
     350             :                 // length (ruleImageSize) now leads to failure, but also that
     351             :                 // the base RuleBasedCollator passed as uca_base here needs to
     352             :                 // have a base->tailoring == CollationRoot::getRoot() otherwise
     353             :                 // the init bails out as well, as it does for the previously
     354             :                 // used "empty" RuleBasedCollator.
     355             :                 // The default collator of the en-US locale would also fulfill
     356             :                 // the requirement. The collator of the actual locale or the
     357             :                 // NULL (default) locale does not.
     358             :                 uca_base = static_cast<RuleBasedCollator*>(icu::Collator::createInstance(
     359           0 :                             icu::Locale::getRoot(), status));
     360             : #endif
     361           0 :                 if (! U_SUCCESS(status)) throw RuntimeException();
     362             :                 collator = new RuleBasedCollator(
     363           0 :                         reinterpret_cast<const uint8_t*>(ruleImage), ruleImageSize, uca_base, status);
     364           0 :                 if (! U_SUCCESS(status)) throw RuntimeException();
     365           0 :             }
     366             :         }
     367          86 :         if (!collator) {
     368             :             /** ICU collators are loaded using a locale only.
     369             :                 ICU uses Variant as collation algorithm name (like de__PHONEBOOK
     370             :                 locale), note the empty territory (Country) designator in this special
     371             :                 case here. The icu::Locale constructor changes the algorithm name to
     372             :                 uppercase itself, so we don't have to bother with that.
     373             :             */
     374          86 :             icu::Locale icuLocale( LanguageTagIcu::getIcuLocale( LanguageTag( rLocale), rAlgorithm));
     375             :             // load ICU collator
     376          86 :             collator = static_cast<RuleBasedCollator*>( icu::Collator::createInstance(icuLocale, status) );
     377          86 :             if (! U_SUCCESS(status)) throw RuntimeException();
     378          86 :         }
     379             :     }
     380             : 
     381          86 :     if (options & CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT)
     382           5 :         collator->setStrength(Collator::PRIMARY);
     383          81 :     else if (options & CollatorOptions::CollatorOptions_IGNORE_CASE)
     384          22 :         collator->setStrength(Collator::SECONDARY);
     385             :     else
     386          59 :         collator->setStrength(Collator::TERTIARY);
     387             : 
     388          86 :     return 0;
     389             : }
     390             : 
     391             : 
     392             : OUString SAL_CALL
     393           0 : Collator_Unicode::getImplementationName() throw( RuntimeException, std::exception )
     394             : {
     395           0 :     return OUString::createFromAscii(implementationName);
     396             : }
     397             : 
     398             : sal_Bool SAL_CALL
     399           0 : Collator_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException, std::exception )
     400             : {
     401           0 :     return cppu::supportsService(this, rServiceName);
     402             : }
     403             : 
     404             : Sequence< OUString > SAL_CALL
     405           0 : Collator_Unicode::getSupportedServiceNames() throw( RuntimeException, std::exception )
     406             : {
     407           0 :     Sequence< OUString > aRet(1);
     408           0 :     aRet[0] = OUString::createFromAscii(implementationName);
     409           0 :     return aRet;
     410             : }
     411             : 
     412             : } } } }
     413             : 
     414             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11