LCOV - code coverage report
Current view: top level - sal/textenc - tcvtmb.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 184 283 65.0 %
Date: 2014-11-03 Functions: 4 4 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "sal/config.h"
      21             : 
      22             : #include "rtl/textcvt.h"
      23             : 
      24             : #include "handleundefinedunicodetotextchar.hxx"
      25             : #include "tenchelp.hxx"
      26             : #include "unichars.hxx"
      27             : 
      28             : /* ======================================================================= */
      29             : 
      30             : /* DBCS to Unicode conversion routine use a lead table for the first byte, */
      31             : /* where we determine the trail table or for single byte chars the unicode */
      32             : /* value. We have for all lead byte a separate table, because we can */
      33             : /* then share many tables for different charset encodings. */
      34             : 
      35             : /* ======================================================================= */
      36             : 
      37        2038 : sal_Size ImplDBCSToUnicode( const void* pData, SAL_UNUSED_PARAMETER void*,
      38             :                             const char* pSrcBuf, sal_Size nSrcBytes,
      39             :                             sal_Unicode* pDestBuf, sal_Size nDestChars,
      40             :                             sal_uInt32 nFlags, sal_uInt32* pInfo,
      41             :                             sal_Size* pSrcCvtBytes )
      42             : {
      43             :     unsigned char                   cLead;
      44             :     unsigned char                   cTrail;
      45             :     sal_Unicode                 cConv;
      46             :     const ImplDBCSToUniLeadTab* pLeadEntry;
      47        2038 :     const ImplDBCSConvertData*  pConvertData = (const ImplDBCSConvertData*)pData;
      48        2038 :     const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab;
      49             :     sal_Unicode*                pEndDestBuf;
      50             :     const char*             pEndSrcBuf;
      51             : 
      52        2038 :     *pInfo = 0;
      53        2038 :     pEndDestBuf = pDestBuf+nDestChars;
      54        2038 :     pEndSrcBuf  = pSrcBuf+nSrcBytes;
      55       12902 :     while ( pSrcBuf < pEndSrcBuf )
      56             :     {
      57        9528 :         cLead = (unsigned char)*pSrcBuf;
      58             : 
      59             :         /* get entry for the lead byte */
      60        9528 :         pLeadEntry = pLeadTab+cLead;
      61             : 
      62             :         /* SingleByte char? */
      63        9528 :         if (pLeadEntry->mpToUniTrailTab == NULL
      64        2670 :             || cLead < pConvertData->mnLeadStart
      65        2670 :             || cLead > pConvertData->mnLeadEnd)
      66             :         {
      67        6858 :             cConv = pLeadEntry->mnUniChar;
      68       13716 :             if ( !cConv && (cLead != 0) )
      69             :             {
      70           0 :                 *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED;
      71           0 :                 if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR )
      72             :                 {
      73           0 :                     *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
      74           0 :                     break;
      75             :                 }
      76           0 :                 else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE )
      77             :                 {
      78           0 :                     pSrcBuf++;
      79           0 :                     continue;
      80             :                 }
      81             :                 else
      82           0 :                     cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags);
      83             :             }
      84             :         }
      85             :         else
      86             :         {
      87             :             /* Source buffer to small */
      88        2670 :             if ( pSrcBuf +1 == pEndSrcBuf )
      89             :             {
      90         702 :                 *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
      91         702 :                 break;
      92             :             }
      93             : 
      94        1968 :             pSrcBuf++;
      95        1968 :             cTrail = (unsigned char)*pSrcBuf;
      96        1968 :             if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
      97        1752 :                 cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
      98             :             else
      99         216 :                 cConv = 0;
     100             : 
     101        1968 :             if ( !cConv )
     102             :             {
     103             :                 /* EUDC Ranges */
     104             :                 sal_uInt16              i;
     105         216 :                 const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab;
     106         544 :                 for ( i = 0; i < pConvertData->mnEUDCCount; i++ )
     107             :                 {
     108         992 :                     if ( (cLead >= pEUDCTab->mnLeadStart) &&
     109         448 :                          (cLead <= pEUDCTab->mnLeadEnd) )
     110             :                     {
     111         432 :                         if ( (cTrail >= pEUDCTab->mnTrail1Start) &&
     112         216 :                              (cTrail <= pEUDCTab->mnTrail1End) )
     113             :                         {
     114             :                             cConv = pEUDCTab->mnUniStart+
     115         156 :                                     ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
     116         312 :                                     (cTrail-pEUDCTab->mnTrail1Start);
     117         156 :                             break;
     118             :                         }
     119             :                         else
     120             :                         {
     121          60 :                             sal_uInt16 nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1;
     122         120 :                             if ( (pEUDCTab->mnTrailCount >= 2) &&
     123         120 :                                  (cTrail >= pEUDCTab->mnTrail2Start) &&
     124          60 :                                  (cTrail <= pEUDCTab->mnTrail2End) )
     125             :                             {
     126             :                                 cConv = pEUDCTab->mnUniStart+
     127          60 :                                         ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
     128             :                                         nTrailCount+
     129         120 :                                         (cTrail-pEUDCTab->mnTrail2Start);
     130          60 :                                 break;
     131             :                             }
     132             :                             else
     133             :                             {
     134           0 :                                 nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1;
     135           0 :                                 if ( (pEUDCTab->mnTrailCount >= 3) &&
     136           0 :                                      (cTrail >= pEUDCTab->mnTrail3Start) &&
     137           0 :                                      (cTrail <= pEUDCTab->mnTrail3End) )
     138             :                                 {
     139             :                                     cConv = pEUDCTab->mnUniStart+
     140           0 :                                             ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
     141             :                                             nTrailCount+
     142           0 :                                             (cTrail-pEUDCTab->mnTrail3Start);
     143           0 :                                     break;
     144             :                                 }
     145             :                             }
     146             :                         }
     147             :                     }
     148             : 
     149         328 :                     pEUDCTab++;
     150             :                 }
     151             : 
     152         216 :                 if ( !cConv )
     153             :                 {
     154             :                     /* We compare the full range of the trail we defined, */
     155             :                     /* which can often be greater than the limit. We do this */
     156             :                     /* so that extensions that don't consider encodings */
     157             :                     /* correctly treat double-byte characters as a single */
     158             :                     /* character as much as possible. */
     159             : 
     160           0 :                     if (cLead < pConvertData->mnLeadStart
     161           0 :                         || cLead > pConvertData->mnLeadEnd
     162           0 :                         || cTrail < pConvertData->mnTrailStart
     163           0 :                         || cTrail > pConvertData->mnTrailEnd)
     164             :                     {
     165           0 :                         *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
     166           0 :                         if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
     167             :                         {
     168           0 :                             *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
     169           0 :                             break;
     170             :                         }
     171           0 :                         else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
     172             :                         {
     173           0 :                             pSrcBuf++;
     174           0 :                             continue;
     175             :                         }
     176             :                         else
     177           0 :                             cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
     178             :                     }
     179             :                     else
     180             :                     {
     181           0 :                         *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
     182           0 :                         if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
     183             :                         {
     184           0 :                             *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
     185           0 :                             break;
     186             :                         }
     187           0 :                         else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
     188             :                         {
     189           0 :                             pSrcBuf++;
     190           0 :                             continue;
     191             :                         }
     192             :                         else
     193           0 :                             cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
     194             :                     }
     195             :                 }
     196             :             }
     197             :         }
     198             : 
     199        8826 :         if ( pDestBuf == pEndDestBuf )
     200             :         {
     201           0 :             *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
     202           0 :             break;
     203             :         }
     204             : 
     205        8826 :         *pDestBuf = cConv;
     206        8826 :         pDestBuf++;
     207        8826 :         pSrcBuf++;
     208             :     }
     209             : 
     210        2038 :     *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
     211        2038 :     return (nDestChars - (pEndDestBuf-pDestBuf));
     212             : }
     213             : 
     214             : /* ----------------------------------------------------------------------- */
     215             : 
     216      183762 : sal_Size ImplUnicodeToDBCS( const void* pData, SAL_UNUSED_PARAMETER void*,
     217             :                             const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
     218             :                             char* pDestBuf, sal_Size nDestBytes,
     219             :                             sal_uInt32 nFlags, sal_uInt32* pInfo,
     220             :                             sal_Size* pSrcCvtChars )
     221             : {
     222             :     sal_uInt16                  cConv;
     223             :     sal_Unicode                 c;
     224             :     unsigned char                   nHighChar;
     225             :     unsigned char                   nLowChar;
     226             :     const ImplUniToDBCSHighTab* pHighEntry;
     227      183762 :     const ImplDBCSConvertData*  pConvertData = (const ImplDBCSConvertData*)pData;
     228      183762 :     const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab;
     229             :     char*                   pEndDestBuf;
     230             :     const sal_Unicode*          pEndSrcBuf;
     231             : 
     232             :     bool bCheckRange =
     233      183762 :         pConvertData->mnLeadStart != 0 || pConvertData->mnLeadEnd != 0xFF;
     234             :         /* this statement has the effect that this extra check is only done for
     235             :            EUC-KR, which uses the MS-949 tables, but does not support the full
     236             :            range of MS-949 */
     237             : 
     238      183762 :     *pInfo = 0;
     239      183762 :     pEndDestBuf = pDestBuf+nDestBytes;
     240      183762 :     pEndSrcBuf  = pSrcBuf+nSrcChars;
     241      425519 :     while ( pSrcBuf < pEndSrcBuf )
     242             :     {
     243      184580 :         c = *pSrcBuf;
     244      184580 :         nHighChar = (unsigned char)((c >> 8) & 0xFF);
     245      184580 :         nLowChar = (unsigned char)(c & 0xFF);
     246             : 
     247             :         /* get entry for the high byte */
     248      184580 :         pHighEntry = pHighTab+nHighChar;
     249             : 
     250             :         /* is low byte in the table range */
     251      184580 :         if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
     252             :         {
     253       53113 :             cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
     254      106226 :             if (bCheckRange && cConv > 0x7F
     255          92 :                 && ((cConv >> 8) < pConvertData->mnLeadStart
     256          72 :                     || (cConv >> 8) > pConvertData->mnLeadEnd
     257          72 :                     || (cConv & 0xFF) < pConvertData->mnTrailStart
     258          72 :                     || (cConv & 0xFF) > pConvertData->mnTrailEnd))
     259          20 :                 cConv = 0;
     260             :         }
     261             :         else
     262      131467 :             cConv = 0;
     263             : 
     264      184580 :         if (cConv == 0 && c != 0)
     265             :         {
     266             :             /* Map to EUDC ranges: */
     267      136442 :             ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab;
     268             :             sal_uInt32 i;
     269      449863 :             for (i = 0; i < pConvertData->mnEUDCCount; ++i)
     270             :             {
     271      323251 :                 if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd)
     272             :                 {
     273        9830 :                     sal_uInt32 nIndex = c - pEUDCTab->mnUniStart;
     274             :                     sal_uInt32 nLeadOff
     275        9830 :                         = nIndex / pEUDCTab->mnTrailRangeCount;
     276             :                     sal_uInt32 nTrailOff
     277        9830 :                         = nIndex % pEUDCTab->mnTrailRangeCount;
     278             :                     sal_uInt32 nSize;
     279             :                     cConv = (sal_uInt16)
     280        9830 :                                 ((pEUDCTab->mnLeadStart + nLeadOff) << 8);
     281             :                     nSize
     282        9830 :                         = pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1;
     283        9830 :                     if (nTrailOff < nSize)
     284             :                     {
     285        4869 :                         cConv |= pEUDCTab->mnTrail1Start + nTrailOff;
     286        4869 :                         break;
     287             :                     }
     288        4961 :                     nTrailOff -= nSize;
     289             :                     nSize
     290        4961 :                         = pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1;
     291        4961 :                     if (nTrailOff < nSize)
     292             :                     {
     293        4947 :                         cConv |= pEUDCTab->mnTrail2Start + nTrailOff;
     294        4947 :                         break;
     295             :                     }
     296          14 :                     nTrailOff -= nSize;
     297          14 :                     cConv |= pEUDCTab->mnTrail3Start + nTrailOff;
     298          14 :                     break;
     299             :                 }
     300      313421 :                 pEUDCTab++;
     301             :             }
     302             : 
     303             :             /* FIXME
     304             :              * SB: Not sure why this is in here.  Plus, it does not work as
     305             :              * intended when (c & 0xFF) == 0, because the next !cConv check
     306             :              * will then think c has not yet been converted...
     307             :              */
     308      136442 :             if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START
     309        9259 :                 && c <= RTL_TEXTCVT_BYTE_PRIVATE_END)
     310             :             {
     311         256 :                 if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
     312           0 :                     cConv = static_cast< char >(static_cast< unsigned char >(c & 0xFF));
     313             :             }
     314             :         }
     315             : 
     316      184580 :         if ( !cConv )
     317             :         {
     318      126617 :             if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
     319             :             {
     320             :                 /* !!! */
     321             :             }
     322             : 
     323      126617 :             if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
     324             :             {
     325             :                 /* !!! */
     326             :             }
     327             : 
     328             :             /* Handle undefined and surrogates characters */
     329             :             /* (all surrogates characters are undefined) */
     330      126617 :             if (sal::detail::textenc::handleUndefinedUnicodeToTextChar(
     331             :                     &pSrcBuf, pEndSrcBuf, &pDestBuf, pEndDestBuf, nFlags,
     332             :                     pInfo))
     333          32 :                 continue;
     334             :             else
     335      126585 :                 break;
     336             :         }
     337             : 
     338             :         /* SingleByte */
     339       57963 :         if ( !(cConv & 0xFF00) )
     340             :         {
     341         474 :             if ( pDestBuf == pEndDestBuf )
     342             :             {
     343           0 :                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
     344           0 :                 break;
     345             :             }
     346             : 
     347         474 :             *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
     348         474 :             pDestBuf++;
     349             :         }
     350             :         else
     351             :         {
     352       57489 :             if ( pDestBuf+1 >= pEndDestBuf )
     353             :             {
     354           0 :                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
     355           0 :                 break;
     356             :             }
     357             : 
     358       57489 :             *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
     359       57489 :             pDestBuf++;
     360       57489 :             *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
     361       57489 :             pDestBuf++;
     362             :         }
     363             : 
     364       57963 :         pSrcBuf++;
     365             :     }
     366             : 
     367      183762 :     *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
     368      183762 :     return (nDestBytes - (pEndDestBuf-pDestBuf));
     369             : }
     370             : 
     371             : /* ======================================================================= */
     372             : 
     373             : #define JIS_EUC_LEAD_OFF                                        0x80
     374             : #define JIS_EUC_TRAIL_OFF                                       0x80
     375             : 
     376             : /* ----------------------------------------------------------------------- */
     377             : 
     378         648 : sal_Size ImplEUCJPToUnicode( const void* pData,
     379             :                              SAL_UNUSED_PARAMETER void*,
     380             :                              const char* pSrcBuf, sal_Size nSrcBytes,
     381             :                              sal_Unicode* pDestBuf, sal_Size nDestChars,
     382             :                              sal_uInt32 nFlags, sal_uInt32* pInfo,
     383             :                              sal_Size* pSrcCvtBytes )
     384             : {
     385             :     unsigned char                   c;
     386         648 :     unsigned char                   cLead = '\0';
     387         648 :     unsigned char                   cTrail = '\0';
     388             :     sal_Unicode                 cConv;
     389             :     const ImplDBCSToUniLeadTab* pLeadEntry;
     390             :     const ImplDBCSToUniLeadTab* pLeadTab;
     391         648 :     const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData;
     392             :     sal_Unicode*                pEndDestBuf;
     393             :     const char*             pEndSrcBuf;
     394             : 
     395         648 :     *pInfo = 0;
     396         648 :     pEndDestBuf = pDestBuf+nDestChars;
     397         648 :     pEndSrcBuf  = pSrcBuf+nSrcBytes;
     398        1928 :     while ( pSrcBuf < pEndSrcBuf )
     399             :     {
     400         958 :         c = (unsigned char)*pSrcBuf;
     401             : 
     402             :         /* ASCII */
     403         958 :         if ( c <= 0x7F )
     404           0 :             cConv = c;
     405             :         else
     406             :         {
     407             :             /* SS2 - Half-width katakana */
     408             :             /* 8E + A1-DF */
     409         958 :             if ( c == 0x8E )
     410             :             {
     411             :                 /* Source buffer to small */
     412           2 :                 if ( pSrcBuf + 1 == pEndSrcBuf )
     413             :                 {
     414           2 :                     *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
     415           2 :                     break;
     416             :                 }
     417             : 
     418           0 :                 pSrcBuf++;
     419           0 :                 c = (unsigned char)*pSrcBuf;
     420           0 :                 if ( (c >= 0xA1) && (c <= 0xDF) )
     421           0 :                     cConv = 0xFF61+(c-0xA1);
     422             :                 else
     423             :                 {
     424           0 :                     cConv = 0;
     425           0 :                     cLead = 0x8E;
     426           0 :                     cTrail = c;
     427             :                 }
     428             :             }
     429             :             else
     430             :             {
     431             :                 /* SS3 - JIS 0212-1990 */
     432             :                 /* 8F + A1-FE + A1-FE */
     433         956 :                 if ( c == 0x8F )
     434             :                 {
     435             :                     /* Source buffer to small */
     436           4 :                     if (pEndSrcBuf - pSrcBuf < 3)
     437             :                     {
     438           4 :                         *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
     439           4 :                         break;
     440             :                     }
     441             : 
     442           0 :                     pSrcBuf++;
     443           0 :                     cLead = (unsigned char)*pSrcBuf;
     444           0 :                     pSrcBuf++;
     445           0 :                     cTrail = (unsigned char)*pSrcBuf;
     446           0 :                     pLeadTab = pConvertData->mpJIS0212ToUniLeadTab;
     447             :                 }
     448             :                 /* CodeSet 2 JIS 0208-1997 */
     449             :                 /* A1-FE + A1-FE */
     450             :                 else
     451             :                 {
     452             :                     /* Source buffer to small */
     453         952 :                     if ( pSrcBuf + 1 == pEndSrcBuf )
     454             :                     {
     455         320 :                         *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
     456         320 :                         break;
     457             :                     }
     458             : 
     459         632 :                     cLead = c;
     460         632 :                     pSrcBuf++;
     461         632 :                     cTrail = (unsigned char)*pSrcBuf;
     462         632 :                     pLeadTab = pConvertData->mpJIS0208ToUniLeadTab;
     463             :                 }
     464             : 
     465             :                 /* Undefined Range */
     466         632 :                 if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
     467           0 :                     cConv = 0;
     468             :                 else
     469             :                 {
     470         632 :                     cLead   -= JIS_EUC_LEAD_OFF;
     471         632 :                     cTrail  -= JIS_EUC_TRAIL_OFF;
     472         632 :                     pLeadEntry = pLeadTab+cLead;
     473         632 :                     if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
     474         632 :                         cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
     475             :                     else
     476           0 :                         cConv = 0;
     477             :                 }
     478             :             }
     479             : 
     480         632 :             if ( !cConv )
     481             :             {
     482             :                 /* We compare the full range of the trail we defined, */
     483             :                 /* which can often be greater than the limit. We do this */
     484             :                 /* so that extensions that don't consider encodings */
     485             :                 /* correctly treat double-byte characters as a single */
     486             :                 /* character as much as possible. */
     487             : 
     488           0 :                 if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
     489             :                 {
     490           0 :                     *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
     491           0 :                     if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
     492             :                     {
     493           0 :                         *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
     494           0 :                         break;
     495             :                     }
     496           0 :                     else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
     497             :                     {
     498           0 :                         pSrcBuf++;
     499           0 :                         continue;
     500             :                     }
     501             :                     else
     502           0 :                         cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
     503             :                 }
     504             :                 else
     505             :                 {
     506           0 :                     *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
     507           0 :                     if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
     508             :                     {
     509           0 :                         *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
     510           0 :                         break;
     511             :                     }
     512           0 :                     else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
     513             :                     {
     514           0 :                         pSrcBuf++;
     515           0 :                         continue;
     516             :                     }
     517             :                     else
     518           0 :                         cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
     519             :                 }
     520             :             }
     521             :         }
     522             : 
     523         632 :         if ( pDestBuf == pEndDestBuf )
     524             :         {
     525           0 :             *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
     526           0 :             break;
     527             :         }
     528             : 
     529         632 :         *pDestBuf = cConv;
     530         632 :         pDestBuf++;
     531         632 :         pSrcBuf++;
     532             :     }
     533             : 
     534         648 :     *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
     535         648 :     return (nDestChars - (pEndDestBuf-pDestBuf));
     536             : }
     537             : 
     538             : /* ----------------------------------------------------------------------- */
     539             : 
     540           6 : sal_Size ImplUnicodeToEUCJP( const void* pData,
     541             :                              SAL_UNUSED_PARAMETER void*,
     542             :                              const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
     543             :                              char* pDestBuf, sal_Size nDestBytes,
     544             :                              sal_uInt32 nFlags, sal_uInt32* pInfo,
     545             :                              sal_Size* pSrcCvtChars )
     546             : {
     547             :     sal_uInt32                  cConv;
     548             :     sal_Unicode                 c;
     549             :     unsigned char                   nHighChar;
     550             :     unsigned char                   nLowChar;
     551             :     const ImplUniToDBCSHighTab* pHighEntry;
     552             :     const ImplUniToDBCSHighTab* pHighTab;
     553           6 :     const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData;
     554             :     char*                   pEndDestBuf;
     555             :     const sal_Unicode*          pEndSrcBuf;
     556             : 
     557           6 :     *pInfo = 0;
     558           6 :     pEndDestBuf = pDestBuf+nDestBytes;
     559           6 :     pEndSrcBuf  = pSrcBuf+nSrcChars;
     560         164 :     while ( pSrcBuf < pEndSrcBuf )
     561             :     {
     562         152 :         c = *pSrcBuf;
     563             : 
     564             :         /* ASCII */
     565         152 :         if ( c <= 0x7F )
     566           0 :             cConv = c;
     567             :         /* Half-width katakana */
     568         152 :         else if ( (c >= 0xFF61) && (c <= 0xFF9F) )
     569           0 :             cConv = 0x8E00+0xA1+(c-0xFF61);
     570             :         else
     571             :         {
     572         152 :             nHighChar = (unsigned char)((c >> 8) & 0xFF);
     573         152 :             nLowChar = (unsigned char)(c & 0xFF);
     574             : 
     575             :             /* JIS 0208 */
     576         152 :             pHighTab = pConvertData->mpUniToJIS0208HighTab;
     577         152 :             pHighEntry = pHighTab+nHighChar;
     578         152 :             if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
     579             :             {
     580         152 :                 cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
     581         304 :                 if (cConv != 0)
     582         150 :                     cConv |= 0x8080;
     583             :             }
     584             :             else
     585           0 :                 cConv = 0;
     586             : 
     587             :             /* JIS 0212 */
     588         152 :             if ( !cConv )
     589             :             {
     590           2 :                 pHighTab = pConvertData->mpUniToJIS0212HighTab;
     591           2 :                 pHighEntry = pHighTab+nHighChar;
     592           2 :                 if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
     593             :                 {
     594           0 :                     cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
     595           0 :                     if (cConv != 0)
     596           0 :                         cConv |= 0x8F8080;
     597             :                 }
     598             : 
     599           2 :                 if ( !cConv )
     600             :                 {
     601           2 :                     if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
     602             :                     {
     603             :                         /* !!! */
     604             :                     }
     605             : 
     606           2 :                     if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
     607             :                     {
     608             :                         /* !!! */
     609             :                     }
     610             : 
     611             :                     /* Handle undefined and surrogates characters */
     612             :                     /* (all surrogates characters are undefined) */
     613           2 :                     if (sal::detail::textenc::handleUndefinedUnicodeToTextChar(
     614             :                             &pSrcBuf, pEndSrcBuf, &pDestBuf, pEndDestBuf,
     615             :                             nFlags, pInfo))
     616           2 :                         continue;
     617             :                     else
     618           0 :                         break;
     619             :                 }
     620             :             }
     621             :         }
     622             : 
     623             :         /* SingleByte */
     624         150 :         if ( !(cConv & 0xFFFF00) )
     625             :         {
     626           0 :             if ( pDestBuf == pEndDestBuf )
     627             :             {
     628           0 :                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
     629           0 :                 break;
     630             :             }
     631             : 
     632           0 :             *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
     633           0 :             pDestBuf++;
     634             :         }
     635             :         /* DoubleByte */
     636         150 :         else if ( !(cConv & 0xFF0000) )
     637             :         {
     638         150 :             if ( pDestBuf+1 >= pEndDestBuf )
     639             :             {
     640           0 :                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
     641           0 :                 break;
     642             :             }
     643             : 
     644         150 :             *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
     645         150 :             pDestBuf++;
     646         150 :             *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
     647         150 :             pDestBuf++;
     648             :         }
     649             :         else
     650             :         {
     651           0 :             if ( pDestBuf+2 >= pEndDestBuf )
     652             :             {
     653           0 :                 *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
     654           0 :                 break;
     655             :             }
     656             : 
     657           0 :             *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 16) & 0xFF));
     658           0 :             pDestBuf++;
     659           0 :             *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
     660           0 :             pDestBuf++;
     661           0 :             *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
     662           0 :             pDestBuf++;
     663             :         }
     664             : 
     665         150 :         pSrcBuf++;
     666             :     }
     667             : 
     668           6 :     *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
     669           6 :     return (nDestBytes - (pEndDestBuf-pDestBuf));
     670             : }
     671             : 
     672             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10