LCOV - code coverage report
Current view: top level - i18npool/source/collator - collator_unicode.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 32 80 40.0 %
Date: 2014-11-03 Functions: 6 10 60.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include <config_locales.h>
      21             : 
      22             : #include "lrl_include.hxx"
      23             : 
      24             : #include <rtl/ustrbuf.hxx>
      25             : #include <i18nlangtag/languagetag.hxx>
      26             : #include <i18nlangtag/languagetagicu.hxx>
      27             : #include <collator_unicode.hxx>
      28             : #include <localedata.hxx>
      29             : #include <com/sun/star/i18n/CollatorOptions.hpp>
      30             : #include <cppuhelper/supportsservice.hxx>
      31             : 
      32             : using namespace ::com::sun::star;
      33             : using namespace ::com::sun::star::lang;
      34             : using namespace ::com::sun::star::uno;
      35             : 
      36             : namespace com { namespace sun { namespace star { namespace i18n {
      37             : 
      38         157 : Collator_Unicode::Collator_Unicode()
      39             : {
      40         157 :     implementationName = "com.sun.star.i18n.Collator_Unicode";
      41         157 :     collator = NULL;
      42         157 :     uca_base = NULL;
      43             : #ifndef DISABLE_DYNLOADING
      44         157 :     hModule = NULL;
      45             : #endif
      46         157 : }
      47             : 
      48         357 : Collator_Unicode::~Collator_Unicode()
      49             : {
      50         119 :     if (collator) delete collator;
      51         119 :     if (uca_base) delete uca_base;
      52             : #ifndef DISABLE_DYNLOADING
      53         119 :     if (hModule) osl_unloadModule(hModule);
      54             : #endif
      55         238 : }
      56             : 
      57             : #ifdef DISABLE_DYNLOADING
      58             : 
      59             : extern "C" {
      60             : 
      61             : // For DISABLE_DYNLOADING the generated functions have names that
      62             : // start with get_collator_data_ to avoid clashing with a few
      63             : // functions in the generated libindex_data that are called just
      64             : // get_zh_pinyin for instance.
      65             : 
      66             : const sal_uInt8* get_collator_data_ca_charset();
      67             : const sal_uInt8* get_collator_data_dz_charset();
      68             : const sal_uInt8* get_collator_data_hu_charset();
      69             : const sal_uInt8* get_collator_data_ja_charset();
      70             : const sal_uInt8* get_collator_data_ja_phonetic_alphanumeric_first();
      71             : const sal_uInt8* get_collator_data_ja_phonetic_alphanumeric_last();
      72             : const sal_uInt8* get_collator_data_ko_charset();
      73             : const sal_uInt8* get_collator_data_ku_alphanumeric();
      74             : const sal_uInt8* get_collator_data_ln_charset();
      75             : const sal_uInt8* get_collator_data_my_dictionary();
      76             : const sal_uInt8* get_collator_data_ne_charset();
      77             : const sal_uInt8* get_collator_data_sid_charset();
      78             : const sal_uInt8* get_collator_data_zh_TW_charset();
      79             : const sal_uInt8* get_collator_data_zh_TW_radical();
      80             : const sal_uInt8* get_collator_data_zh_TW_stroke();
      81             : const sal_uInt8* get_collator_data_zh_charset();
      82             : const sal_uInt8* get_collator_data_zh_pinyin();
      83             : const sal_uInt8* get_collator_data_zh_radical();
      84             : const sal_uInt8* get_collator_data_zh_stroke();
      85             : const sal_uInt8* get_collator_data_zh_zhuyin();
      86             : 
      87             : size_t get_collator_data_ca_charset_length();
      88             : size_t get_collator_data_dz_charset_length();
      89             : size_t get_collator_data_hu_charset_length();
      90             : size_t get_collator_data_ja_charset_length();
      91             : size_t get_collator_data_ja_phonetic_alphanumeric_first_length();
      92             : size_t get_collator_data_ja_phonetic_alphanumeric_last_length();
      93             : size_t get_collator_data_ko_charset_length();
      94             : size_t get_collator_data_ku_alphanumeric_length();
      95             : size_t get_collator_data_ln_charset_length();
      96             : size_t get_collator_data_my_dictionary_length();
      97             : size_t get_collator_data_ne_charset_length();
      98             : size_t get_collator_data_sid_charset_length();
      99             : size_t get_collator_data_zh_TW_charset_length();
     100             : size_t get_collator_data_zh_TW_radical_length();
     101             : size_t get_collator_data_zh_TW_stroke_length();
     102             : size_t get_collator_data_zh_charset_length();
     103             : size_t get_collator_data_zh_pinyin_length();
     104             : size_t get_collator_data_zh_radical_length();
     105             : size_t get_collator_data_zh_stroke_length();
     106             : size_t get_collator_data_zh_zhuyin_length();
     107             : 
     108             : }
     109             : 
     110             : #endif
     111             : 
     112             : sal_Int32 SAL_CALL
     113        1593 : Collator_Unicode::compareSubstring( const OUString& str1, sal_Int32 off1, sal_Int32 len1,
     114             :     const OUString& str2, sal_Int32 off2, sal_Int32 len2) throw(RuntimeException, std::exception)
     115             : {
     116        1593 :     return collator->compare(reinterpret_cast<const UChar *>(str1.getStr()) + off1, len1, reinterpret_cast<const UChar *>(str2.getStr()) + off2, len2); // UChar != sal_Unicode in MinGW
     117             : }
     118             : 
     119             : sal_Int32 SAL_CALL
     120       85318 : Collator_Unicode::compareString( const OUString& str1, const OUString& str2) throw(RuntimeException, std::exception)
     121             : {
     122       85318 :     return collator->compare(reinterpret_cast<const UChar *>(str1.getStr()), reinterpret_cast<const UChar *>(str2.getStr()));   // UChar != sal_Unicode in MinGW
     123             : }
     124             : 
     125             : #ifndef DISABLE_DYNLOADING
     126             : 
     127           0 : extern "C" { static void SAL_CALL thisModule() {} }
     128             : 
     129             : #endif
     130             : 
     131             : sal_Int32 SAL_CALL
     132         157 : Collator_Unicode::loadCollatorAlgorithm(const OUString& rAlgorithm, const lang::Locale& rLocale, sal_Int32 options)
     133             :     throw(RuntimeException, std::exception)
     134             : {
     135         157 :     if (!collator) {
     136         157 :         UErrorCode status = U_ZERO_ERROR;
     137         157 :         OUString rule = LocaleDataImpl().getCollatorRuleByAlgorithm(rLocale, rAlgorithm);
     138         157 :         if (!rule.isEmpty()) {
     139           0 :             collator = new RuleBasedCollator(reinterpret_cast<const UChar *>(rule.getStr()), status);   // UChar != sal_Unicode in MinGW
     140           0 :             if (! U_SUCCESS(status)) throw RuntimeException();
     141             :         }
     142         157 :         if (!collator && OUString::createFromAscii(LOCAL_RULE_LANGS).indexOf(rLocale.Language) >= 0) {
     143           0 :             const sal_uInt8* (*func)() = NULL;
     144           0 :             size_t (*funclen)() = NULL;
     145             : 
     146             : #ifndef DISABLE_DYNLOADING
     147           0 :             OUStringBuffer aBuf;
     148             : #ifdef SAL_DLLPREFIX
     149           0 :             aBuf.appendAscii(SAL_DLLPREFIX);
     150             : #endif
     151           0 :             aBuf.appendAscii( "collator_data" ).appendAscii( SAL_DLLEXTENSION );
     152           0 :             hModule = osl_loadModuleRelative( &thisModule, aBuf.makeStringAndClear().pData, SAL_LOADMODULE_DEFAULT );
     153           0 :             if (hModule) {
     154           0 :                 aBuf.appendAscii("get_").append(rLocale.Language).appendAscii("_");
     155           0 :                 if ( rLocale.Language == "zh" ) {
     156           0 :                     OUString func_base = aBuf.makeStringAndClear();
     157           0 :                     OUString funclen_base = func_base + "_length";
     158           0 :                     if (OUString("TW HK MO").indexOf(rLocale.Country) >= 0)
     159             :                     {
     160             :                         func = (const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule,
     161           0 :                                     OUString(func_base + "TW_" + rAlgorithm).pData);
     162             :                         funclen = (size_t (*)()) osl_getFunctionSymbol(hModule,
     163           0 :                                     OUString(funclen_base + "TW_" + rAlgorithm).pData);
     164             :                     }
     165           0 :                     if (!func)
     166             :                     {
     167             :                         func = (const sal_uInt8* (*)()) osl_getFunctionSymbol(
     168           0 :                                 hModule, OUString(func_base + rAlgorithm).pData);
     169             :                         funclen = (size_t (*)()) osl_getFunctionSymbol(
     170           0 :                                 hModule, OUString(funclen_base + rAlgorithm).pData);
     171           0 :                     }
     172             :                 } else {
     173           0 :                     if ( rLocale.Language == "ja" ) {
     174             :                         // replace algorithm name to implementation name.
     175           0 :                         if (rAlgorithm == "phonetic (alphanumeric first)")
     176           0 :                             aBuf.appendAscii("phonetic_alphanumeric_first");
     177           0 :                         else if (rAlgorithm == "phonetic (alphanumeric last)")
     178           0 :                             aBuf.appendAscii("phonetic_alphanumeric_last");
     179             :                         else
     180           0 :                             aBuf.append(rAlgorithm);
     181             :                     } else {
     182           0 :                         aBuf.append(rAlgorithm);
     183             :                     }
     184           0 :                     OUString func_base = aBuf.makeStringAndClear();
     185           0 :                     OUString funclen_base = func_base + "_length";
     186           0 :                     func = (const sal_uInt8* (*)()) osl_getFunctionSymbol(hModule, func_base.pData);
     187           0 :                     funclen = (size_t (*)()) osl_getFunctionSymbol(hModule, funclen_base.pData);
     188             :                 }
     189             :             }
     190             : #else
     191             :             if (false) {
     192             :                 ;
     193             : #if WITH_LOCALE_ALL || WITH_LOCALE_ca
     194             :             } else if ( rLocale.Language == "ca" ) {
     195             :                 if ( rAlgorithm == "charset" )
     196             :                 {
     197             :                     func = get_collator_data_ca_charset;
     198             :                     funclen = get_collator_data_ca_charset_length;
     199             :                 }
     200             : #endif
     201             : #if WITH_LOCALE_ALL || WITH_LOCALE_dz
     202             :             } else if ( rLocale.Language == "dz" || rLocale.Language == "bo" ) {
     203             :                 // 'bo' Tibetan uses the same collation rules as 'dz' Dzongkha
     204             :                 if ( rAlgorithm == "charset" )
     205             :                 {
     206             :                     func = get_collator_data_dz_charset;
     207             :                     funclen = get_collator_data_dz_charset_length;
     208             :                 }
     209             : #endif
     210             : #if WITH_LOCALE_ALL || WITH_LOCALE_hu
     211             :             } else if ( rLocale.Language == "hu" ) {
     212             :                 if ( rAlgorithm == "charset" )
     213             :                 {
     214             :                     func = get_collator_data_hu_charset;
     215             :                     funclen = get_collator_data_hu_charset_length;
     216             :                 }
     217             : #endif
     218             : #if WITH_LOCALE_ALL || WITH_LOCALE_ja
     219             :             } else if ( rLocale.Language == "ja" ) {
     220             :                 if ( rAlgorithm == "charset" )
     221             :                 {
     222             :                     func = get_collator_data_ja_charset;
     223             :                     funclen = get_collator_data_ja_charset_length;
     224             :                 }
     225             :                 else if ( rAlgorithm == "phonetic (alphanumeric first)" )
     226             :                 {
     227             :                     func = get_collator_data_ja_phonetic_alphanumeric_first;
     228             :                     funclen = get_collator_data_ja_phonetic_alphanumeric_first_length;
     229             :                 }
     230             :                 else if ( rAlgorithm == "phonetic (alphanumeric last)" )
     231             :                 {
     232             :                     func = get_collator_data_ja_phonetic_alphanumeric_last;
     233             :                     funclen = get_collator_data_ja_phonetic_alphanumeric_last_length;
     234             :                 }
     235             : #endif
     236             : #if WITH_LOCALE_ALL || WITH_LOCALE_ko
     237             : #if (U_ICU_VERSION_MAJOR_NUM < 53)
     238             :             } else if ( rLocale.Language == "ko" ) {
     239             :                 if ( rAlgorithm == "charset" )
     240             :                 {
     241             :                     func = get_collator_data_ko_charset;
     242             :                     funclen = get_collator_data_ko_charset_length;
     243             :                 }
     244             : #endif
     245             : #endif
     246             : #if WITH_LOCALE_ALL || WITH_LOCALE_ku
     247             :             } else if ( rLocale.Language == "ku" ) {
     248             :                 if ( rAlgorithm == "alphanumeric" )
     249             :                 {
     250             :                     func = get_collator_data_ku_alphanumeric;
     251             :                     funclen = get_collator_data_ku_alphanumeric_length;
     252             :                 }
     253             : #endif
     254             : #if WITH_LOCALE_ALL || WITH_LOCALE_ln
     255             :             } else if ( rLocale.Language == "ln" ) {
     256             :                 if ( rAlgorithm == "charset" )
     257             :                 {
     258             :                     func = get_collator_data_ln_charset;
     259             :                     funclen = get_collator_data_ln_charset_length;
     260             :                 }
     261             : #endif
     262             : #if WITH_LOCALE_ALL || WITH_LOCALE_my
     263             :             } else if ( rLocale.Language == "my" ) {
     264             :                 if ( rAlgorithm == "dictionary" )
     265             :                 {
     266             :                     func = get_collator_data_my_dictionary;
     267             :                     funclen = get_collator_data_my_dictionary_length;
     268             :                 }
     269             : #endif
     270             : #if WITH_LOCALE_ALL || WITH_LOCALE_ne
     271             :             } else if ( rLocale.Language == "ne" ) {
     272             :                 if ( rAlgorithm == "charset" )
     273             :                 {
     274             :                     func = get_collator_data_ne_charset;
     275             :                     funclen = get_collator_data_ne_charset_length;
     276             :                 }
     277             : #endif
     278             : #if WITH_LOCALE_ALL || WITH_LOCALE_sid
     279             :             } else if ( rLocale.Language == "sid" ) {
     280             :                 if ( rAlgorithm == "charset" )
     281             :                 {
     282             :                     func = get_collator_data_sid_charset;
     283             :                     funclen = get_collator_data_sid_charset_length;
     284             :                 }
     285             : #endif
     286             : #if WITH_LOCALE_ALL || WITH_LOCALE_zh
     287             :             } else if ( rLocale.Language == "zh" && (rLocale.Country == "TW" || rLocale.Country == "HK" || rLocale.Country == "MO") ) {
     288             :                 if ( rAlgorithm == "charset" )
     289             :                 {
     290             :                     func = get_collator_data_zh_TW_charset;
     291             :                     funclen = get_collator_data_zh_TW_charset_length;
     292             :                 }
     293             :                 else if ( rAlgorithm == "radical" )
     294             :                 {
     295             :                     func = get_collator_data_zh_TW_radical;
     296             :                     funclen = get_collator_data_zh_TW_radical_length;
     297             :                 }
     298             :                 else if ( rAlgorithm == "stroke" )
     299             :                 {
     300             :                     func = get_collator_data_zh_TW_stroke;
     301             :                     funclen = get_collator_data_zh_TW_stroke_length;
     302             :                 }
     303             :             } else if ( rLocale.Language == "zh" ) {
     304             :                 if ( rAlgorithm == "charset" )
     305             :                 {
     306             :                     func = get_collator_data_zh_charset;
     307             :                     funclen = get_collator_data_zh_charset_length;
     308             :                 }
     309             :                 else if ( rAlgorithm == "pinyin" )
     310             :                 {
     311             :                     func = get_collator_data_zh_pinyin;
     312             :                     funclen = get_collator_data_zh_pinyin_length;
     313             :                 }
     314             :                 else if ( rAlgorithm == "radical" )
     315             :                 {
     316             :                     func = get_collator_data_zh_radical;
     317             :                     funclen = get_collator_data_zh_radical_length;
     318             :                 }
     319             :                 else if ( rAlgorithm == "stroke" )
     320             :                 {
     321             :                     func = get_collator_data_zh_stroke;
     322             :                     funclen = get_collator_data_zh_stroke_length;
     323             :                 }
     324             :                 else if ( rAlgorithm == "zhuyin" )
     325             :                 {
     326             :                     func = get_collator_data_zh_zhuyin;
     327             :                     funclen = get_collator_data_zh_zhuyin_length;
     328             :                 }
     329             : #endif
     330             :             }
     331             : #endif // DISABLE_DYNLOADING
     332           0 :             if (func && funclen) {
     333           0 :                 const sal_uInt8* ruleImage=func();
     334           0 :                 size_t ruleImageSize = funclen();
     335             : 
     336             : #if (U_ICU_VERSION_MAJOR_NUM == 4) && (U_ICU_VERSION_MINOR_NUM <= 2)
     337             :                 uca_base = new RuleBasedCollator(static_cast<UChar*>(NULL), status);
     338             : #else
     339             :                 // Not only changed ICU 53.1 the API behavior that a negative
     340             :                 // length (ruleImageSize) now leads to failure, but also that
     341             :                 // the base RuleBasedCollator passed as uca_base here needs to
     342             :                 // have a base->tailoring == CollationRoot::getRoot() otherwise
     343             :                 // the init bails out as well, as it does for the previously
     344             :                 // used "empty" RuleBasedCollator.
     345             :                 // The default collator of the en-US locale would also fulfill
     346             :                 // the requirement. The collator of the actual locale or the
     347             :                 // NULL (default) locale does not.
     348             :                 uca_base = static_cast<RuleBasedCollator*>(icu::Collator::createInstance(
     349           0 :                             icu::Locale::getRoot(), status));
     350             : #endif
     351           0 :                 if (! U_SUCCESS(status)) throw RuntimeException();
     352             :                 collator = new RuleBasedCollator(
     353           0 :                         reinterpret_cast<const uint8_t*>(ruleImage), ruleImageSize, uca_base, status);
     354           0 :                 if (! U_SUCCESS(status)) throw RuntimeException();
     355           0 :             }
     356             :         }
     357         157 :         if (!collator) {
     358             :             /** ICU collators are loaded using a locale only.
     359             :                 ICU uses Variant as collation algorithm name (like de__PHONEBOOK
     360             :                 locale), note the empty territory (Country) designator in this special
     361             :                 case here. The icu::Locale constructor changes the algorithm name to
     362             :                 uppercase itself, so we don't have to bother with that.
     363             :             */
     364         157 :             icu::Locale icuLocale( LanguageTagIcu::getIcuLocale( LanguageTag( rLocale), rAlgorithm));
     365             :             // load ICU collator
     366         157 :             collator = static_cast<RuleBasedCollator*>( icu::Collator::createInstance(icuLocale, status) );
     367         157 :             if (! U_SUCCESS(status)) throw RuntimeException();
     368         157 :         }
     369             :     }
     370             : 
     371         157 :     if (options & CollatorOptions::CollatorOptions_IGNORE_CASE_ACCENT)
     372          10 :         collator->setStrength(Collator::PRIMARY);
     373         147 :     else if (options & CollatorOptions::CollatorOptions_IGNORE_CASE)
     374          42 :         collator->setStrength(Collator::SECONDARY);
     375             :     else
     376         105 :         collator->setStrength(Collator::TERTIARY);
     377             : 
     378         157 :     return(0);
     379             : }
     380             : 
     381             : 
     382             : OUString SAL_CALL
     383           0 : Collator_Unicode::getImplementationName() throw( RuntimeException, std::exception )
     384             : {
     385           0 :     return OUString::createFromAscii(implementationName);
     386             : }
     387             : 
     388             : sal_Bool SAL_CALL
     389           0 : Collator_Unicode::supportsService(const OUString& rServiceName) throw( RuntimeException, std::exception )
     390             : {
     391           0 :     return cppu::supportsService(this, rServiceName);
     392             : }
     393             : 
     394             : Sequence< OUString > SAL_CALL
     395           0 : Collator_Unicode::getSupportedServiceNames() throw( RuntimeException, std::exception )
     396             : {
     397           0 :     Sequence< OUString > aRet(1);
     398           0 :     aRet[0] = OUString::createFromAscii(implementationName);
     399           0 :     return aRet;
     400             : }
     401             : 
     402             : } } } }
     403             : 
     404             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10