LCOV - code coverage report
Current view: top level - i18npool/source/transliteration - transliteration_body.cxx (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 59 141 41.8 %
Date: 2012-08-25 Functions: 6 19 31.6 %
Legend: Lines: hit not hit | Branches: + taken - not taken # not executed Branches: 20 76 26.3 %

           Branch data     Line data    Source code
       1                 :            : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2                 :            : /*
       3                 :            :  * This file is part of the LibreOffice project.
       4                 :            :  *
       5                 :            :  * This Source Code Form is subject to the terms of the Mozilla Public
       6                 :            :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7                 :            :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8                 :            :  *
       9                 :            :  * This file incorporates work covered by the following license notice:
      10                 :            :  *
      11                 :            :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12                 :            :  *   contributor license agreements. See the NOTICE file distributed
      13                 :            :  *   with this work for additional information regarding copyright
      14                 :            :  *   ownership. The ASF licenses this file to you under the Apache
      15                 :            :  *   License, Version 2.0 (the "License"); you may not use this file
      16                 :            :  *   except in compliance with the License. You may obtain a copy of
      17                 :            :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18                 :            :  */
      19                 :            : 
      20                 :            : 
      21                 :            : #include <rtl/ustrbuf.hxx>
      22                 :            : #include <i18nutil/casefolding.hxx>
      23                 :            : #include <i18nutil/unicode.hxx>
      24                 :            : 
      25                 :            : #include <comphelper/processfactory.hxx>
      26                 :            : #include <comphelper/string.hxx>
      27                 :            : #include <osl/diagnose.h>
      28                 :            : 
      29                 :            : #include <string.h>
      30                 :            : 
      31                 :            : #include "characterclassificationImpl.hxx"
      32                 :            : #include "breakiteratorImpl.hxx"
      33                 :            : 
      34                 :            : #define TRANSLITERATION_ALL
      35                 :            : #include "transliteration_body.hxx"
      36                 :            : 
      37                 :            : using namespace ::com::sun::star::uno;
      38                 :            : using namespace ::com::sun::star::lang;
      39                 :            : using namespace ::rtl;
      40                 :            : 
      41                 :            : namespace com { namespace sun { namespace star { namespace i18n {
      42                 :            : 
      43                 :            : 
      44                 :      35043 : Transliteration_body::Transliteration_body()
      45                 :            : {
      46                 :      35043 :     nMappingType = 0;
      47                 :      35043 :     transliterationName = "Transliteration_body";
      48                 :      35043 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_body";
      49                 :      35043 : }
      50                 :            : 
      51                 :          0 : sal_Int16 SAL_CALL Transliteration_body::getType() throw(RuntimeException)
      52                 :            : {
      53                 :          0 :     return TransliterationType::ONE_TO_ONE;
      54                 :            : }
      55                 :            : 
      56                 :          0 : sal_Bool SAL_CALL Transliteration_body::equals(
      57                 :            :     const OUString& /*str1*/, sal_Int32 /*pos1*/, sal_Int32 /*nCount1*/, sal_Int32& /*nMatch1*/,
      58                 :            :     const OUString& /*str2*/, sal_Int32 /*pos2*/, sal_Int32 /*nCount2*/, sal_Int32& /*nMatch2*/)
      59                 :            :     throw(RuntimeException)
      60                 :            : {
      61         [ #  # ]:          0 :     throw RuntimeException();
      62                 :            : }
      63                 :            : 
      64                 :            : Sequence< OUString > SAL_CALL
      65                 :          0 : Transliteration_body::transliterateRange( const OUString& str1, const OUString& str2 )
      66                 :            :     throw( RuntimeException)
      67                 :            : {
      68                 :          0 :     Sequence< OUString > ostr(2);
      69         [ #  # ]:          0 :     ostr[0] = str1;
      70         [ #  # ]:          0 :     ostr[1] = str2;
      71                 :          0 :     return ostr;
      72                 :            : }
      73                 :            : 
      74                 :            : 
      75                 :          0 : static sal_uInt8 lcl_getMappingTypeForToggleCase( sal_uInt8 nMappingType, sal_Unicode cChar )
      76                 :            : {
      77                 :          0 :     sal_uInt8 nRes = nMappingType;
      78                 :            : 
      79                 :            :     // take care of TOGGLE_CASE transliteration:
      80                 :            :     // nMappingType should not be a combination of flags, thuse we decide now
      81                 :            :     // which one to use.
      82         [ #  # ]:          0 :     if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
      83                 :            :     {
      84                 :          0 :         const sal_Int16 nType = unicode::getUnicodeType( cChar );
      85         [ #  # ]:          0 :         if (nType & 0x02 /* lower case*/)
      86                 :          0 :             nRes = MappingTypeLowerToUpper;
      87                 :            :         else
      88                 :            :         {
      89                 :            :             // should also work properly for non-upper characters like white spacs, numbers, ...
      90                 :          0 :             nRes = MappingTypeUpperToLower;
      91                 :            :         }
      92                 :            :     }
      93                 :            : 
      94                 :          0 :     return nRes;
      95                 :            : }
      96                 :            : 
      97                 :            : 
      98                 :            : OUString SAL_CALL
      99                 :     893190 : Transliteration_body::transliterate(
     100                 :            :     const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
     101                 :            :     Sequence< sal_Int32 >& offset)
     102                 :            :     throw(RuntimeException)
     103                 :            : {
     104                 :            : 
     105                 :     893190 :     const sal_Unicode *in = inStr.getStr() + startPos;
     106                 :            : 
     107                 :            :     // Two different blocks to eliminate the if(useOffset) condition inside the
     108                 :            :     // inner k loop. Yes, on massive use even such small things do count.
     109         [ +  + ]:     893190 :     if ( useOffset )
     110                 :            :     {
     111                 :        705 :         sal_Int32 nOffCount = 0, i;
     112         [ +  + ]:      28654 :         for (i = 0; i < nCount; i++)
     113                 :            :         {
     114                 :            :             // take care of TOGGLE_CASE transliteration:
     115                 :      27949 :             sal_uInt8 nTmpMappingType = nMappingType;
     116         [ -  + ]:      27949 :             if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
     117                 :          0 :                 nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
     118                 :            : 
     119                 :      27949 :             const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
     120                 :      27949 :             nOffCount += map.nmap;
     121                 :            :         }
     122                 :        705 :         rtl_uString* pStr = comphelper::string::rtl_uString_alloc(nOffCount);
     123                 :        705 :         sal_Unicode* out = pStr->buffer;
     124                 :            : 
     125         [ -  + ]:        705 :         if ( nOffCount != offset.getLength() )
     126                 :          0 :             offset.realloc( nOffCount );
     127                 :            : 
     128                 :        705 :         sal_Int32 j = 0;
     129                 :        705 :         sal_Int32 * pArr = offset.getArray();
     130         [ +  + ]:      28654 :         for (i = 0; i < nCount; i++)
     131                 :            :         {
     132                 :            :             // take care of TOGGLE_CASE transliteration:
     133                 :      27949 :             sal_uInt8 nTmpMappingType = nMappingType;
     134         [ -  + ]:      27949 :             if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
     135                 :          0 :                 nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
     136                 :            : 
     137                 :      27949 :             const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
     138         [ +  + ]:      55898 :             for (sal_Int32 k = 0; k < map.nmap; k++)
     139                 :            :             {
     140                 :      27949 :                 pArr[j] = i + startPos;
     141                 :      27949 :                 out[j++] = map.map[k];
     142                 :            :             }
     143                 :            :         }
     144                 :        705 :         out[j] = 0;
     145                 :            : 
     146                 :        705 :         return OUString( pStr, SAL_NO_ACQUIRE );
     147                 :            :     }
     148                 :            :     else
     149                 :            :     {
     150                 :            :         // In the simple case of no offset sequence used we can eliminate the
     151                 :            :         // first getValue() loop. We could also assume that most calls result
     152                 :            :         // in identical string lengths, thus using a preallocated
     153                 :            :         // OUStringBuffer could be an easy way to assemble the return string
     154                 :            :         // without too much hassle. However, for single characters the
     155                 :            :         // OUStringBuffer::append() method is quite expensive compared to a
     156                 :            :         // simple array operation, so it pays here to copy the final result
     157                 :            :         // instead.
     158                 :            : 
     159                 :            :         // Allocate the max possible buffer. Try to use stack instead of heap,
     160                 :            :         // which would have to be reallocated most times anyways.
     161                 :     892485 :         const sal_Int32 nLocalBuf = 2048;
     162                 :     892485 :         sal_Unicode aLocalBuf[ nLocalBuf * NMAPPINGMAX ], *out = aLocalBuf, *pHeapBuf = NULL;
     163         [ -  + ]:     892485 :         if ( nCount > nLocalBuf )
     164         [ #  # ]:          0 :             out = pHeapBuf = new sal_Unicode[ nCount * NMAPPINGMAX ];
     165                 :            : 
     166                 :     892485 :         sal_Int32 j = 0;
     167         [ +  + ]:    8340169 :         for ( sal_Int32 i = 0; i < nCount; i++)
     168                 :            :         {
     169                 :            :             // take care of TOGGLE_CASE transliteration:
     170                 :    7447684 :             sal_uInt8 nTmpMappingType = nMappingType;
     171         [ -  + ]:    7447684 :             if (nMappingType == (MappingTypeLowerToUpper | MappingTypeUpperToLower))
     172         [ #  # ]:          0 :                 nTmpMappingType = lcl_getMappingTypeForToggleCase( nMappingType, in[i] );
     173                 :            : 
     174         [ +  - ]:    7447684 :             const Mapping &map = casefolding::getValue( in, i, nCount, aLocale, nTmpMappingType );
     175         [ +  + ]:   14895368 :             for (sal_Int32 k = 0; k < map.nmap; k++)
     176                 :            :             {
     177                 :    7447684 :                 out[j++] = map.map[k];
     178                 :            :             }
     179                 :            :         }
     180                 :            : 
     181                 :     892485 :         OUString aRet( out, j );
     182         [ -  + ]:     892485 :         if ( pHeapBuf )
     183         [ #  # ]:          0 :             delete [] pHeapBuf;
     184                 :     893190 :         return aRet;
     185                 :            :     }
     186                 :            : }
     187                 :            : 
     188                 :            : OUString SAL_CALL
     189                 :          0 : Transliteration_body::transliterateChar2String( sal_Unicode inChar ) throw(RuntimeException)
     190                 :            : {
     191                 :          0 :         const Mapping &map = casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
     192                 :          0 :         rtl_uString* pStr = comphelper::string::rtl_uString_alloc(map.nmap);
     193                 :          0 :         sal_Unicode* out = pStr->buffer;
     194                 :            :         sal_Int32 i;
     195                 :            : 
     196         [ #  # ]:          0 :         for (i = 0; i < map.nmap; i++)
     197                 :          0 :             out[i] = map.map[i];
     198                 :          0 :         out[i] = 0;
     199                 :            : 
     200                 :          0 :         return OUString( pStr, SAL_NO_ACQUIRE );
     201                 :            : }
     202                 :            : 
     203                 :            : sal_Unicode SAL_CALL
     204                 :          9 : Transliteration_body::transliterateChar2Char( sal_Unicode inChar ) throw(MultipleCharsOutputException, RuntimeException)
     205                 :            : {
     206                 :          9 :         const Mapping &map = casefolding::getValue(&inChar, 0, 1, aLocale, nMappingType);
     207         [ -  + ]:          9 :         if (map.nmap > 1)
     208         [ #  # ]:          0 :             throw MultipleCharsOutputException();
     209                 :          9 :         return map.map[0];
     210                 :            : }
     211                 :            : 
     212                 :            : OUString SAL_CALL
     213                 :          0 : Transliteration_body::folding( const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
     214                 :            :     Sequence< sal_Int32 >& offset) throw(RuntimeException)
     215                 :            : {
     216                 :          0 :     return this->transliterate(inStr, startPos, nCount, offset);
     217                 :            : }
     218                 :            : 
     219                 :      34872 : Transliteration_casemapping::Transliteration_casemapping()
     220                 :            : {
     221                 :      34872 :     nMappingType = 0;
     222                 :      34872 :     transliterationName = "casemapping(generic)";
     223                 :      34872 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_casemapping";
     224                 :      34872 : }
     225                 :            : 
     226                 :            : void SAL_CALL
     227                 :     892430 : Transliteration_casemapping::setMappingType( const sal_uInt8 rMappingType, const Locale& rLocale )
     228                 :            : {
     229                 :     892430 :     nMappingType = rMappingType;
     230                 :     892430 :     aLocale = rLocale;
     231                 :     892430 : }
     232                 :            : 
     233                 :          0 : Transliteration_u2l::Transliteration_u2l()
     234                 :            : {
     235                 :          0 :     nMappingType = MappingTypeUpperToLower;
     236                 :          0 :     transliterationName = "upper_to_lower(generic)";
     237                 :          0 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_u2l";
     238                 :          0 : }
     239                 :            : 
     240                 :          6 : Transliteration_l2u::Transliteration_l2u()
     241                 :            : {
     242                 :          6 :     nMappingType = MappingTypeLowerToUpper;
     243                 :          6 :     transliterationName = "lower_to_upper(generic)";
     244                 :          6 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_l2u";
     245                 :          6 : }
     246                 :            : 
     247                 :          0 : Transliteration_togglecase::Transliteration_togglecase()
     248                 :            : {
     249                 :            :     // usually nMappingType must NOT be a combiantion of different flages here,
     250                 :            :     // but we take care of that problem in Transliteration_body::transliterate above
     251                 :            :     // before that value is used. There we will decide which of both is to be used on
     252                 :            :     // a per character basis.
     253                 :          0 :     nMappingType = MappingTypeLowerToUpper | MappingTypeUpperToLower;
     254                 :          0 :     transliterationName = "toggle(generic)";
     255                 :          0 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_togglecase";
     256                 :          0 : }
     257                 :            : 
     258                 :          0 : Transliteration_titlecase::Transliteration_titlecase()
     259                 :            : {
     260                 :          0 :     nMappingType = MappingTypeToTitle;
     261                 :          0 :     transliterationName = "title(generic)";
     262                 :          0 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_titlecase";
     263                 :          0 : }
     264                 :            : 
     265                 :          0 : static rtl::OUString transliterate_titlecase_Impl(
     266                 :            :     const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
     267                 :            :     const Locale &rLocale,
     268                 :            :     Sequence< sal_Int32 >& offset )
     269                 :            :     throw(RuntimeException)
     270                 :            : {
     271                 :          0 :     const OUString aText( inStr.copy( startPos, nCount ) );
     272                 :            : 
     273                 :          0 :     OUString aRes;
     274         [ #  # ]:          0 :     if (!aText.isEmpty())
     275                 :            :     {
     276         [ #  # ]:          0 :         Reference< XMultiServiceFactory > xMSF = ::comphelper::getProcessServiceFactory();
     277         [ #  # ]:          0 :         CharacterClassificationImpl aCharClassImpl( xMSF );
     278                 :            : 
     279                 :            :         // because aCharClassImpl.toTitle does not handle ligatures or ß but will raise
     280                 :            :         // an exception we need to handle the first chara manually...
     281                 :            : 
     282                 :            :         // we don't want to change surrogates by accident, thuse we use proper code point iteration
     283                 :          0 :         sal_Int32 nPos = 0;
     284         [ #  # ]:          0 :         sal_uInt32 cFirstChar = aText.iterateCodePoints( &nPos );
     285         [ #  # ]:          0 :         OUString aResolvedLigature( &cFirstChar, 1 ); //lcl_ResolveLigature( cFirstChar ) );
     286                 :            :         // toUpper can be used to properly resolve ligatures and characters like ß
     287         [ #  # ]:          0 :         aResolvedLigature = aCharClassImpl.toUpper( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
     288                 :            :         // since toTitle will leave all-uppercase text unchanged we first need to
     289                 :            :         // use toLower to bring possible 2nd and following charas in lowercase
     290         [ #  # ]:          0 :         aResolvedLigature = aCharClassImpl.toLower( aResolvedLigature, 0, aResolvedLigature.getLength(), rLocale );
     291                 :          0 :         sal_Int32 nResolvedLen = aResolvedLigature.getLength();
     292                 :            : 
     293                 :            :         // now we can properly use toTitle to get the expected result for the resolved string.
     294                 :            :         // The rest of the text should just become lowercase.
     295         [ #  # ]:          0 :         aRes = aCharClassImpl.toTitle( aResolvedLigature, 0, nResolvedLen, rLocale );
     296         [ #  # ]:          0 :         aRes += aCharClassImpl.toLower( aText, 1, aText.getLength() - 1, rLocale );
     297         [ #  # ]:          0 :         offset.realloc( aRes.getLength() );
     298                 :            : 
     299         [ #  # ]:          0 :         sal_Int32 *pOffset = offset.getArray();
     300                 :          0 :         sal_Int32 nLen = offset.getLength();
     301         [ #  # ]:          0 :         for (sal_Int32 i = 0; i < nLen; ++i)
     302                 :            :         {
     303                 :          0 :             sal_Int32 nIdx = 0;
     304         [ #  # ]:          0 :             if (i >= nResolvedLen)
     305                 :          0 :                 nIdx = i - nResolvedLen + 1;
     306                 :          0 :             pOffset[i] = nIdx;
     307         [ #  # ]:          0 :         }
     308                 :            :     }
     309                 :            : #if OSL_DEBUG_LEVEL > 1
     310                 :            :     const sal_Int32 *pCOffset = offset.getConstArray();
     311                 :            :     (void) pCOffset;
     312                 :            : #endif
     313                 :            : 
     314                 :          0 :     return aRes;
     315                 :            : }
     316                 :            : 
     317                 :            : 
     318                 :            : // this function expects to be called on a word-by-word basis,
     319                 :            : // namely that startPos points to the first char of the word
     320                 :          0 : rtl::OUString SAL_CALL Transliteration_titlecase::transliterate(
     321                 :            :     const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
     322                 :            :     Sequence< sal_Int32 >& offset )
     323                 :            :     throw(RuntimeException)
     324                 :            : {
     325                 :          0 :     return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset );
     326                 :            : }
     327                 :            : 
     328                 :            : 
     329                 :          0 : Transliteration_sentencecase::Transliteration_sentencecase()
     330                 :            : {
     331                 :          0 :     nMappingType = MappingTypeToTitle;  // though only to be applied to the first word...
     332                 :          0 :     transliterationName = "sentence(generic)";
     333                 :          0 :     implementationName = "com.sun.star.i18n.Transliteration.Transliteration_sentencecase";
     334                 :          0 : }
     335                 :            : 
     336                 :            : 
     337                 :            : // this function expects to be called on a sentence-by-sentence basis,
     338                 :            : // namely that startPos points to the first word (NOT first char!) in the sentence
     339                 :          0 : rtl::OUString SAL_CALL Transliteration_sentencecase::transliterate(
     340                 :            :     const OUString& inStr, sal_Int32 startPos, sal_Int32 nCount,
     341                 :            :     Sequence< sal_Int32 >& offset )
     342                 :            :     throw(RuntimeException)
     343                 :            : {
     344                 :          0 :     return transliterate_titlecase_Impl( inStr, startPos, nCount, aLocale, offset );
     345                 :            : }
     346                 :            : 
     347                 :            : 
     348                 :            : } } } }
     349                 :            : 
     350                 :            : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10