LCOV - code coverage report
Current view: top level - i18npool/source/transliteration - transliteration_body.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 119 141 84.4 %
Date: 2014-11-03 Functions: 14 19 73.7 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include <rtl/ustrbuf.hxx>
      21             : #include <i18nutil/casefolding.hxx>
      22             : #include <i18nutil/unicode.hxx>
      23             : 
      24             : #include <comphelper/processfactory.hxx>
      25             : #include <comphelper/string.hxx>
      26             : #include <osl/diagnose.h>
      27             : 
      28             : #include <string.h>
      29             : 
      30             : #include "characterclassificationImpl.hxx"
      31             : #include "breakiteratorImpl.hxx"
      32             : 
      33             : #include "transliteration_body.hxx"
      34             : #include <boost/scoped_array.hpp>
      35             : 
      36             : using namespace ::com::sun::star::uno;
      37             : using namespace ::com::sun::star::lang;
      38             : 
      39             : namespace com { namespace sun { namespace star { namespace i18n {
      40             : 
      41       60701 : Transliteration_body::Transliteration_body()
      42             : {
      43       60701 :     nMappingType = 0;
      44       60701 :     transliterationName = "Transliteration_body";
      45       60701 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body";
      46       60701 : }
      47             : 
      48           0 : sal_Int16 SAL_CALL Transliteration_body::getType() throw(RuntimeException, std::exception)
      49             : {
      50           0 :     return TransliterationType::ONE_TO_ONE;
      51             : }
      52             : 
      53           0 : sal_Bool SAL_CALL Transliteration_body::equals(
      54             :     const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/,
      55             :     const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/)
      56             :     throw(RuntimeException, std::exception)
      57             : {
      58           0 :     throw RuntimeException();
      59             : }
      60             : 
      61             : Sequence< OUString > SAL_CALL
      62           0 : Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 )
      63             :     throw( RuntimeException, std::exception)
      64             : {
      65           0 :     Sequence< OUString > ostr(2);
      66           0 :     ostr[0] = str1;
      67           0 :     ostr[1] = str2;
      68           0 :     return ostr;
      69             : }
      70             : 
      71          24 : static sal_uInt8 lcl_getMappingTypeForToggleCase( sal_uInt8 nMappingType, sal_Unicode cChar )
      72             : {
      73          24 :     sal_uInt8 nRes = nMappingType;
      74             : 
      75             :     // take care of TOGGLE_CASE transliteration:
      76             :     // nMappingType should not be a combination of flags, thuse we decide now
      77             :     // which one to use.
      78          24 :     if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
      79             :     {
      80          24 :         const sal_Int16 nType = unicode::getUnicodeType( cChar );
      81          24 :         if (nType & 0x02 /* lower case*/)
      82          20 :             nRes = MappingTypeLowerToUpper;
      83             :         else
      84             :         {
      85             :             // should also work properly for non-upper characters like white spacs, numbers, ...
      86           4 :             nRes = MappingTypeUpperToLower;
      87             :         }
      88             :     }
      89             : 
      90          24 :     return nRes;
      91             : }
      92             : 
      93             : OUString SAL_CALL
      94     3078640 : Transliteration_body::transliterate(
      95             :     const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
      96             :     Sequence< sal_Int32 >& offset)
      97             :     throw(RuntimeException, std::exception)
      98             : {
      99     3078640 :     const sal_Unicode *in = inStr.getStr() + startPos;
     100             : 
     101             :     // Two different blocks to eliminate the if(useOffset) condition inside the
     102             :     // inner k loop. Yes, on massive use even such small things do count.
     103     3078640 :     if ( useOffset )
     104             :     {
     105         950 :         sal_Int32 nOffCount = 0, i;
     106       29858 :         for (i = 0; i < nCount; i++)
     107             :         {
     108             :             // take care of TOGGLE_CASE transliteration:
     109       28908 :             sal_uInt8 nTmpMappingType = nMappingType;
     110       28908 :             if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
     111          12 :                 nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
     112             : 
     113       28908 :             const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
     114       28908 :             nOffCount += map.nmap;
     115             :         }
     116         950 :         rtl_uString* pStr = rtl_uString_alloc(nOffCount);
     117         950 :         sal_Unicode* out = pStr->buffer;
     118             : 
     119         950 :         if ( nOffCount != offset.getLength() )
     120           0 :             offset.realloc( nOffCount );
     121             : 
     122         950 :         sal_Int32 j = 0;
     123         950 :         sal_Int32 * pArr = offset.getArray();
     124       29858 :         for (i = 0; i < nCount; i++)
     125             :         {
     126             :             // take care of TOGGLE_CASE transliteration:
     127       28908 :             sal_uInt8 nTmpMappingType = nMappingType;
     128       28908 :             if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
     129          12 :                 nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
     130             : 
     131       28908 :             const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
     132       57816 :             for (sal_Int32 k = 0; k < map.nmap; k++)
     133             :             {
     134       28908 :                 pArr[j] = i + startPos;
     135       28908 :                 out[j++] = map.map[k];
     136             :             }
     137             :         }
     138         950 :         out[j] = 0;
     139             : 
     140         950 :         return OUString( pStr, SAL_NO_ACQUIRE );
     141             :     }
     142             :     else
     143             :     {
     144             :         // In the simple case of no offset sequence used we can eliminate the
     145             :         // first getValue() loop. We could also assume that most calls result
     146             :         // in identical string lengths, thus using a preallocated
     147             :         // OUStringBuffer could be an easy way to assemble the return string
     148             :         // without too much hassle. However, for single characters the
     149             :         // OUStringBuffer::append() method is quite expensive compared to a
     150             :         // simple array operation, so it pays here to copy the final result
     151             :         // instead.
     152             : 
     153             :         // Allocate the max possible buffer. Try to use stack instead of heap,
     154             :         // which would have to be reallocated most times anyways.
     155     3077690 :         const sal_Int32 nLocalBuf = 2048;
     156     3077690 :         sal_Unicode aLocalBuf[ nLocalBuf * NMAPPINGMAX ], *out = aLocalBuf;
     157     3077690 :         boost::scoped_array<sal_Unicode> pHeapBuf;
     158     3077690 :         if ( nCount > nLocalBuf ) {
     159           2 :             pHeapBuf.reset(new sal_Unicode[ nCount * NMAPPINGMAX ]);
     160           2 :             out = pHeapBuf.get();
     161             :         }
     162             : 
     163     3077690 :         sal_Int32 j = 0;
     164    30266858 :         for ( sal_Int32 i = 0; i < nCount; i++)
     165             :         {
     166             :             // take care of TOGGLE_CASE transliteration:
     167    27189168 :             sal_uInt8 nTmpMappingType = nMappingType;
     168    27189168 :             if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
     169           0 :                 nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
     170             : 
     171    27189168 :             const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
     172    54378344 :             for (sal_Int32 k = 0; k < map.nmap; k++)
     173             :             {
     174    27189176 :                 out[j++] = map.map[k];
     175             :             }
     176             :         }
     177             : 
     178     6155380 :         OUString aRet( out, j );
     179     6155380 :         return aRet;
     180             :     }
     181             : }
     182             : 
     183             : OUString SAL_CALL
     184           0 : Transliteration_body::transliterateChar2String( sal_Unicode inChar ) throw(RuntimeException, std::exception)
     185             : {
     186           0 :     const Mapping &map = casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
     187           0 :     rtl_uString* pStr = rtl_uString_alloc(map.nmap);
     188           0 :     sal_Unicode* out = pStr->buffer;
     189             :     sal_Int32 i;
     190             : 
     191           0 :     for (i = 0; i < map.nmap; i++)
     192           0 :         out[i] = map.map[i];
     193           0 :     out[i] = 0;
     194             : 
     195           0 :     return OUString( pStr, SAL_NO_ACQUIRE );
     196             : }
     197             : 
     198             : sal_Unicode SAL_CALL
     199          16 : Transliteration_body::transliterateChar2Char( sal_Unicode inChar ) throw(MultipleCharsOutputException, RuntimeException, std::exception)
     200             : {
     201          16 :     const Mapping &map = casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
     202          16 :     if (map.nmap > 1)
     203           0 :         throw MultipleCharsOutputException();
     204          16 :     return map.map[0];
     205             : }
     206             : 
     207             : OUString SAL_CALL
     208           0 : Transliteration_body::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
     209             :     Sequence< sal_Int32 >& offset) throw(RuntimeException, std::exception)
     210             : {
     211           0 :     return this->transliterate(inStr, startPos, nCount, offset);
     212             : }
     213             : 
     214       60441 : Transliteration_casemapping::Transliteration_casemapping()
     215             : {
     216       60441 :     nMappingType = 0;
     217       60441 :     transliterationName = "casemapping(generic)";
     218       60441 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
     219       60441 : }
     220             : 
     221             : void SAL_CALL
     222     3077626 : Transliteration_casemapping::setMappingType( const sal_uInt8 rMappingType, const Locale& rLocale )
     223             : {
     224     3077626 :     nMappingType = rMappingType;
     225     3077626 :     aLocale = rLocale;
     226     3077626 : }
     227             : 
     228           4 : Transliteration_u2l::Transliteration_u2l()
     229             : {
     230           4 :     nMappingType = MappingTypeUpperToLower;
     231           4 :     transliterationName = "upper_to_lower(generic)";
     232           4 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_u2l";
     233           4 : }
     234             : 
     235          10 : Transliteration_l2u::Transliteration_l2u()
     236             : {
     237          10 :     nMappingType = MappingTypeLowerToUpper;
     238          10 :     transliterationName = "lower_to_upper(generic)";
     239          10 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u";
     240          10 : }
     241             : 
     242           2 : Transliteration_togglecase::Transliteration_togglecase()
     243             : {
     244             :     // usually nMappingType must NOT be a combiantion of different flages here,
     245             :     // but we take care of that problem in Transliteration_body::transliterate above
     246             :     // before that value is used. There we will decide which of both is to be used on
     247             :     // a per character basis.
     248           2 :     nMappingType = MappingTypeLowerToUpper | MappingTypeUpperToLower;
     249           2 :     transliterationName = "toggle(generic)";
     250           2 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_togglecase";
     251           2 : }
     252             : 
     253           2 : Transliteration_titlecase::Transliteration_titlecase()
     254             : {
     255           2 :     nMappingType = MappingTypeToTitle;
     256           2 :     transliterationName = "title(generic)";
     257           2 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_titlecase";
     258           2 : }
     259             : 
     260           4 : static OUString transliterate_titlecase_Impl(
     261             :     const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
     262             :     const Locale &rLocale,
     263             :     Sequence< sal_Int32 >& offset )
     264             :     throw(RuntimeException)
     265             : {
     266           4 :     const OUString aText( inStr.copy( startPos, nCount ) );
     267             : 
     268           4 :     OUString aRes;
     269           4 :     if (!aText.isEmpty())
     270             :     {
     271           4 :         Reference< XComponentContext > xContext = ::comphelper::getProcessComponentContext();
     272           8 :         CharacterClassificationImpl aCharClassImpl( xContext );
     273             : 
     274             :         // because aCharClassImpl.toTitle does not handle ligatures or Beta but will raise
     275             :         // an exception we need to handle the first chara manually...
     276             : 
     277             :         // we don't want to change surrogates by accident, thuse we use proper code point iteration
     278           4 :         sal_Int32 nPos = 0;
     279           4 :         sal_uInt32 cFirstChar = aText.iterateCodePoints( &nPos );
     280           8 :         OUString aResolvedLigature( &cFirstChar, 1 );
     281             :         // toUpper can be used to properly resolve ligatures and characters like Beta
     282           4 :         aResolvedLigature = aCharClassImpl.toUpper( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
     283             :         // since toTitle will leave all-uppercase text unchanged we first need to
     284             :         // use toLower to bring possible 2nd and following charas in lowercase
     285           4 :         aResolvedLigature = aCharClassImpl.toLower( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
     286           4 :         sal_Int32 nResolvedLen = aResolvedLigature.getLength();
     287             : 
     288             :         // now we can properly use toTitle to get the expected result for the resolved string.
     289             :         // The rest of the text should just become lowercase.
     290           4 :         aRes = aCharClassImpl.toTitle( aResolvedLigature, 0, nResolvedLen, rLocale );
     291           4 :         aRes += aCharClassImpl.toLower( aText, 1, aText.getLength() - 1, rLocale );
     292           4 :         offset.realloc( aRes.getLength() );
     293             : 
     294           4 :         sal_Int32 *pOffset = offset.getArray();
     295           4 :         sal_Int32 nLen = offset.getLength();
     296          28 :         for (sal_Int32 i = 0; i < nLen; ++i)
     297             :         {
     298          24 :             sal_Int32 nIdx = 0;
     299          24 :             if (i >= nResolvedLen)
     300          20 :                 nIdx = i - nResolvedLen + 1;
     301          24 :             pOffset[i] = nIdx;
     302           4 :         }
     303             :     }
     304             : #if OSL_DEBUG_LEVEL > 1
     305             :     const sal_Int32 *pCOffset = offset.getConstArray();
     306             :     (void) pCOffset;
     307             : #endif
     308             : 
     309           4 :     return aRes;
     310             : }
     311             : 
     312             : // this function expects to be called on a word-by-word basis,
     313             : // namely that startPos points to the first char of the word
     314           2 : OUString SAL_CALL Transliteration_titlecase::transliterate(
     315             :     const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
     316             :     Sequence< sal_Int32 >& offset )
     317             :     throw(RuntimeException, std::exception)
     318             : {
     319           2 :     return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset );
     320             : }
     321             : 
     322           2 : Transliteration_sentencecase::Transliteration_sentencecase()
     323             : {
     324           2 :     nMappingType = MappingTypeToTitle;  // though only to be applied to the first word...
     325           2 :     transliterationName = "sentence(generic)";
     326           2 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_sentencecase";
     327           2 : }
     328             : 
     329             : // this function expects to be called on a sentence-by-sentence basis,
     330             : // namely that startPos points to the first word (NOT first char!) in the sentence
     331           2 : OUString SAL_CALL Transliteration_sentencecase::transliterate(
     332             :     const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
     333             :     Sequence< sal_Int32 >& offset )
     334             :     throw(RuntimeException, std::exception)
     335             : {
     336           2 :     return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset );
     337             : }
     338             : 
     339             : } } } }
     340             : 
     341             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10