LCOV - code coverage report
Current view: top level - i18npool/source/textconversion - textconversion_zh.cxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 41 139 29.5 %
Date: 2015-06-13 12:38:46 Functions: 6 8 75.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : 
      21             : #include <assert.h>
      22             : #include <textconversion.hxx>
      23             : #include <com/sun/star/i18n/TextConversionType.hpp>
      24             : #include <com/sun/star/i18n/TextConversionOption.hpp>
      25             : #include <com/sun/star/linguistic2/ConversionDirection.hpp>
      26             : #include <com/sun/star/linguistic2/ConversionDictionaryType.hpp>
      27             : #include <com/sun/star/linguistic2/ConversionDictionaryList.hpp>
      28             : #include <comphelper/string.hxx>
      29             : #include <boost/scoped_array.hpp>
      30             : 
      31             : using namespace com::sun::star::lang;
      32             : using namespace com::sun::star::i18n;
      33             : using namespace com::sun::star::linguistic2;
      34             : using namespace com::sun::star::uno;
      35             : 
      36             : 
      37             : namespace com { namespace sun { namespace star { namespace i18n {
      38             : 
      39           2 : TextConversion_zh::TextConversion_zh( const Reference < XComponentContext >& xContext )
      40           2 :     : TextConversionService("com.sun.star.i18n.TextConversion_zh")
      41             : {
      42           2 :     xCDL = ConversionDictionaryList::create(xContext);
      43           2 : }
      44             : 
      45           4 : sal_Unicode SAL_CALL getOneCharConversion(sal_Unicode ch, const sal_Unicode* Data, const sal_uInt16* Index)
      46             : {
      47           4 :     if (Data && Index) {
      48           4 :         sal_Unicode address = Index[ch>>8];
      49           4 :         if (address != 0xFFFF)
      50           4 :             address = Data[address + (ch & 0xFF)];
      51           4 :         return (address != 0xFFFF) ? address : ch;
      52             :     } else {
      53           0 :         return ch;
      54             :     }
      55             : }
      56             : 
      57             : #ifdef DISABLE_DYNLOADING
      58             : 
      59             : extern "C" {
      60             : 
      61             : const sal_Unicode* getSTC_CharData_T2S();
      62             : const sal_uInt16* getSTC_CharIndex_T2S();
      63             : const sal_Unicode* getSTC_CharData_S2V();
      64             : const sal_uInt16* getSTC_CharIndex_S2V();
      65             : const sal_Unicode* getSTC_CharData_S2T();
      66             : const sal_uInt16* getSTC_CharIndex_S2T();
      67             : 
      68             : const sal_Unicode *getSTC_WordData(sal_Int32&);
      69             : 
      70             : const sal_uInt16 *getSTC_WordIndex_T2S(sal_Int32&);
      71             : const sal_uInt16 *getSTC_WordEntry_T2S();
      72             : const sal_uInt16 *getSTC_WordIndex_S2T(sal_Int32&);
      73             : const sal_uInt16 *getSTC_WordEntry_S2T();
      74             : 
      75             : }
      76             : 
      77             : #endif
      78             : 
      79             : OUString SAL_CALL
      80           4 : TextConversion_zh::getCharConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions)
      81             : {
      82             :     const sal_Unicode *Data;
      83             :     const sal_uInt16 *Index;
      84             : 
      85             : #ifndef DISABLE_DYNLOADING
      86           4 :     if (toSChinese) {
      87           2 :         Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_T2S"))();
      88           2 :         Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_T2S"))();
      89           2 :     } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
      90           2 :         Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2V"))();
      91           2 :         Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2V"))();
      92             :     } else {
      93           0 :         Data = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2T"))();
      94           0 :         Index = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2T"))();
      95             :     }
      96             : #else
      97             :     if (toSChinese) {
      98             :         Data = getSTC_CharData_T2S();
      99             :         Index = getSTC_CharIndex_T2S();
     100             :     } else if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
     101             :         Data = getSTC_CharData_S2V();
     102             :         Index = getSTC_CharIndex_S2V();
     103             :     } else {
     104             :         Data = getSTC_CharData_S2T();
     105             :         Index = getSTC_CharIndex_S2T();
     106             :     }
     107             : #endif
     108             : 
     109           4 :     rtl_uString * newStr = rtl_uString_alloc(nLength);
     110           8 :     for (sal_Int32 i = 0; i < nLength; i++)
     111             :         newStr->buffer[i] =
     112           4 :             getOneCharConversion(aText[nStartPos+i], Data, Index);
     113           4 :     return OUString(newStr, SAL_NO_ACQUIRE); //take ownership
     114             : }
     115             : 
     116             : OUString SAL_CALL
     117           0 : TextConversion_zh::getWordConversion(const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength, bool toSChinese, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
     118             : {
     119           0 :     sal_Int32 dictLen = 0;
     120           0 :     sal_Int32 maxLen = 0;
     121             :     const sal_uInt16 *index;
     122             :     const sal_uInt16 *entry;
     123             :     const sal_Unicode *charData;
     124             :     const sal_uInt16 *charIndex;
     125           0 :     bool one2one=true;
     126             : 
     127             : #ifndef DISABLE_DYNLOADING
     128           0 :     const sal_Unicode *wordData = reinterpret_cast<const sal_Unicode* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordData"))(dictLen);
     129           0 :     if (toSChinese) {
     130           0 :         index = reinterpret_cast<const sal_uInt16* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordIndex_T2S"))(maxLen);
     131           0 :         entry = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_WordEntry_T2S"))();
     132           0 :         charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_T2S"))();
     133           0 :         charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_T2S"))();
     134             :     } else {
     135           0 :         index = reinterpret_cast<const sal_uInt16* (*)(sal_Int32&)>(getFunctionBySymbol("getSTC_WordIndex_S2T"))(maxLen);
     136           0 :         entry = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_WordEntry_S2T"))();
     137           0 :         if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
     138           0 :             charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2V"))();
     139           0 :             charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2V"))();
     140             :         } else {
     141           0 :             charData = reinterpret_cast<const sal_Unicode* (*)()>(getFunctionBySymbol("getSTC_CharData_S2T"))();
     142           0 :             charIndex = reinterpret_cast<const sal_uInt16* (*)()>(getFunctionBySymbol("getSTC_CharIndex_S2T"))();
     143             :         }
     144             :     }
     145             : #else
     146             :     const sal_Unicode *wordData = getSTC_WordData(dictLen);
     147             :     if (toSChinese) {
     148             :         index = getSTC_WordIndex_T2S(maxLen);
     149             :         entry = getSTC_WordEntry_T2S();
     150             :         charData = getSTC_CharData_T2S();
     151             :         charIndex = getSTC_CharIndex_T2S();
     152             :     } else {
     153             :         index = getSTC_WordIndex_S2T(maxLen);
     154             :         entry = getSTC_WordEntry_S2T();
     155             :         if (nConversionOptions & TextConversionOption::USE_CHARACTER_VARIANTS) {
     156             :             charData = getSTC_CharData_S2V();
     157             :             charIndex = getSTC_CharIndex_S2V();
     158             :         } else {
     159             :             charData = getSTC_CharData_S2T();
     160             :             charIndex = getSTC_CharIndex_S2T();
     161             :         }
     162             :     }
     163             : #endif
     164             : 
     165           0 :     if ((!wordData || !index || !entry) && !xCDL.is()) // no word mapping defined, do char2char conversion.
     166           0 :         return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
     167             : 
     168           0 :     boost::scoped_array<sal_Unicode> newStr(new sal_Unicode[nLength * 2 + 1]);
     169           0 :     sal_Int32 currPos = 0, count = 0;
     170           0 :     while (currPos < nLength) {
     171           0 :         sal_Int32 len = nLength - currPos;
     172           0 :         bool found = false;
     173           0 :         if (len > maxLen)
     174           0 :             len = maxLen;
     175           0 :         for (; len > 0 && ! found; len--) {
     176           0 :             OUString word = aText.copy(nStartPos + currPos, len);
     177           0 :             sal_Int32 current = 0;
     178             :             // user dictionary
     179           0 :             if (xCDL.is()) {
     180           0 :                 Sequence < OUString > conversions;
     181             :                 try {
     182           0 :                     conversions = xCDL->queryConversions(word, 0, len,
     183             :                             aLocale, ConversionDictionaryType::SCHINESE_TCHINESE,
     184             :                             /*toSChinese ?*/ ConversionDirection_FROM_LEFT /*: ConversionDirection_FROM_RIGHT*/,
     185           0 :                             nConversionOptions);
     186             :                 }
     187           0 :                 catch ( NoSupportException & ) {
     188             :                     // clear reference (when there is no user dictionary) in order
     189             :                     // to not always have to catch this exception again
     190             :                     // in further calls. (save time)
     191           0 :                     xCDL = 0;
     192             :                 }
     193           0 :                 catch (...) {
     194             :                     // catch all other exceptions to allow
     195             :                     // querying the system dictionary in the next line
     196             :                 }
     197           0 :                 if (conversions.getLength() > 0) {
     198           0 :                     if (offset.getLength() > 0) {
     199           0 :                         if (word.getLength() != conversions[0].getLength())
     200           0 :                             one2one=false;
     201           0 :                         while (current < conversions[0].getLength()) {
     202           0 :                             offset[count] = nStartPos + currPos + (current *
     203           0 :                                     word.getLength() / conversions[0].getLength());
     204           0 :                             newStr[count++] = conversions[0][current++];
     205             :                         }
     206             :                         // offset[count-1] = nStartPos + currPos + word.getLength() - 1;
     207             :                     } else {
     208           0 :                         while (current < conversions[0].getLength())
     209           0 :                             newStr[count++] = conversions[0][current++];
     210             :                     }
     211           0 :                     currPos += word.getLength();
     212           0 :                     found = true;
     213           0 :                 }
     214             :             }
     215             : 
     216           0 :             if (!found && index[len+1] - index[len] > 0) {
     217           0 :                 sal_Int32 bottom = (sal_Int32) index[len];
     218           0 :                 sal_Int32 top = (sal_Int32) index[len+1] - 1;
     219             : 
     220           0 :                 while (bottom <= top && !found) {
     221           0 :                     current = (top + bottom) / 2;
     222           0 :                     const sal_Int32 result = word.compareTo(wordData + entry[current]);
     223           0 :                     if (result < 0)
     224           0 :                         top = current - 1;
     225           0 :                     else if (result > 0)
     226           0 :                         bottom = current + 1;
     227             :                     else {
     228           0 :                         if (toSChinese)   // Traditionary/Simplified conversion,
     229           0 :                             for (current = entry[current]-1; current > 0 && wordData[current-1]; current--) ;
     230             :                         else  // Simplified/Traditionary conversion, forwards search for next word
     231           0 :                             current = entry[current] + word.getLength() + 1;
     232           0 :                         sal_Int32 start=current;
     233           0 :                         if (offset.getLength() > 0) {
     234           0 :                             if (word.getLength() != OUString(&wordData[current]).getLength())
     235           0 :                                 one2one=false;
     236           0 :                             sal_Int32 convertedLength=OUString(&wordData[current]).getLength();
     237           0 :                             while (wordData[current]) {
     238           0 :                                 offset[count]=nStartPos + currPos + ((current-start) *
     239           0 :                                     word.getLength() / convertedLength);
     240           0 :                                 newStr[count++] = wordData[current++];
     241             :                             }
     242             :                             // offset[count-1]=nStartPos + currPos + word.getLength() - 1;
     243             :                         } else {
     244           0 :                             while (wordData[current])
     245           0 :                                 newStr[count++] = wordData[current++];
     246             :                         }
     247           0 :                         currPos += word.getLength();
     248           0 :                         found = true;
     249             :                     }
     250             :                 }
     251             :             }
     252           0 :         }
     253           0 :         if (!found) {
     254           0 :             if (offset.getLength() > 0)
     255           0 :                 offset[count]=nStartPos+currPos;
     256           0 :             newStr[count++] =
     257           0 :                 getOneCharConversion(aText[nStartPos+currPos], charData, charIndex);
     258           0 :             currPos++;
     259             :         }
     260             :     }
     261           0 :     if (offset.getLength() > 0)
     262           0 :         offset.realloc(one2one ? 0 : count);
     263           0 :     OUString aRet(newStr.get(), count);
     264           0 :     return aRet;
     265             : }
     266             : 
     267             : TextConversionResult SAL_CALL
     268           2 : TextConversion_zh::getConversions( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
     269             :     const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
     270             :     throw(  RuntimeException, IllegalArgumentException, NoSupportException, std::exception )
     271             : {
     272           2 :     TextConversionResult result;
     273             : 
     274           2 :     result.Candidates.realloc(1);
     275           2 :     result.Candidates[0] = getConversion( aText, nStartPos, nLength, rLocale, nConversionType, nConversionOptions);
     276           2 :     result.Boundary.startPos = nStartPos;
     277           2 :     result.Boundary.endPos = nStartPos + nLength;
     278             : 
     279           2 :     return result;
     280             : }
     281             : 
     282             : OUString SAL_CALL
     283           2 : TextConversion_zh::getConversion( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
     284             :     const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions)
     285             :     throw(  RuntimeException, IllegalArgumentException, NoSupportException, std::exception )
     286             : {
     287           2 :     if (rLocale.Language == "zh" && ( nConversionType == TextConversionType::TO_SCHINESE || nConversionType == TextConversionType::TO_TCHINESE) ) {
     288             : 
     289           2 :         aLocale=rLocale;
     290           2 :         bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
     291             : 
     292           2 :         if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER)
     293             :             // char to char dictionary
     294           2 :             return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
     295             :         else {
     296           0 :             Sequence <sal_Int32> offset;
     297             :             // word to word dictionary
     298           0 :             return  getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
     299             :         }
     300             :     } else
     301           0 :         throw NoSupportException(); // Conversion type is not supported in this service.
     302             : }
     303             : 
     304             : OUString SAL_CALL
     305           2 : TextConversion_zh::getConversionWithOffset( const OUString& aText, sal_Int32 nStartPos, sal_Int32 nLength,
     306             :     const Locale& rLocale, sal_Int16 nConversionType, sal_Int32 nConversionOptions, Sequence<sal_Int32>& offset)
     307             :     throw(  RuntimeException, IllegalArgumentException, NoSupportException, std::exception )
     308             : {
     309           2 :     if (rLocale.Language == "zh" && ( nConversionType == TextConversionType::TO_SCHINESE || nConversionType == TextConversionType::TO_TCHINESE) ) {
     310             : 
     311           2 :         aLocale=rLocale;
     312           2 :         bool toSChinese = nConversionType == TextConversionType::TO_SCHINESE;
     313             : 
     314           2 :         if (nConversionOptions & TextConversionOption::CHARACTER_BY_CHARACTER) {
     315           2 :             offset.realloc(0);
     316             :             // char to char dictionary
     317           2 :             return getCharConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions);
     318             :         } else {
     319           0 :             if (offset.getLength() < 2*nLength)
     320           0 :                 offset.realloc(2*nLength);
     321             :             // word to word dictionary
     322           0 :             return  getWordConversion(aText, nStartPos, nLength, toSChinese, nConversionOptions, offset);
     323             :         }
     324             :     } else
     325           0 :         throw NoSupportException(); // Conversion type is not supported in this service.
     326             : }
     327             : 
     328             : sal_Bool SAL_CALL
     329           0 : TextConversion_zh::interactiveConversion( const Locale& /*rLocale*/, sal_Int16 /*nTextConversionType*/, sal_Int32 /*nTextConversionOptions*/ )
     330             :     throw(  RuntimeException, IllegalArgumentException, NoSupportException, std::exception )
     331             : {
     332           0 :     return sal_False;
     333             : }
     334             : 
     335             : } } } }
     336             : 
     337             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11