LCOV - code coverage report
Current view: top level - sal/textenc - tencinfo.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 280 288 97.2 %
Date: 2014-11-03 Functions: 14 14 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "sal/config.h"
      21             : 
      22             : #include <cstddef>
      23             : #include <cstring>
      24             : 
      25             : #include "rtl/tencinfo.h"
      26             : 
      27             : #include "gettextencodingdata.hxx"
      28             : #include "tenchelp.hxx"
      29             : #include <boost/scoped_array.hpp>
      30             : 
      31        2222 : sal_Bool SAL_CALL rtl_isOctetTextEncoding(rtl_TextEncoding nEncoding)
      32             : {
      33             :     return
      34             :         nEncoding > RTL_TEXTENCODING_DONTKNOW
      35        2222 :         && nEncoding != 9 // RTL_TEXTENCODING_SYSTEM
      36        4444 :         && nEncoding <= RTL_TEXTENCODING_ADOBE_DINGBATS; // always update this!
      37             : }
      38             : 
      39             : /* ======================================================================= */
      40             : 
      41         220 : static void Impl_toAsciiLower( const char* pName, char* pBuf )
      42             : {
      43        2336 :     while ( *pName )
      44             :     {
      45             :         /* A-Z */
      46        1896 :         if ( (*pName >= 0x41) && (*pName <= 0x5A) )
      47         180 :             *pBuf = (*pName)+0x20;  /* toAsciiLower */
      48             :         else
      49        1716 :             *pBuf = *pName;
      50             : 
      51        1896 :         pBuf++;
      52        1896 :         pName++;
      53             :     }
      54             : 
      55         220 :     *pBuf = '\0';
      56         220 : }
      57             : 
      58             : /* ----------------------------------------------------------------------- */
      59             : 
      60        6465 : static void Impl_toAsciiLowerAndRemoveNonAlphanumeric( const char* pName, char* pBuf )
      61             : {
      62       61513 :     while ( *pName )
      63             :     {
      64             :         /* A-Z */
      65       48583 :         if ( (*pName >= 0x41) && (*pName <= 0x5A) )
      66             :         {
      67         544 :             *pBuf = (*pName)+0x20;  /* toAsciiLower */
      68         544 :             pBuf++;
      69             :         }
      70             :         /* a-z, 0-9 */
      71       68243 :         else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) ||
      72       33973 :                   ((*pName >= 0x30) && (*pName <= 0x39)) )
      73             :         {
      74       41576 :             *pBuf = *pName;
      75       41576 :             pBuf++;
      76             :         }
      77             : 
      78       48583 :         pName++;
      79             :     }
      80             : 
      81        6465 :     *pBuf = '\0';
      82        6465 : }
      83             : 
      84             : /* ----------------------------------------------------------------------- */
      85             : 
      86             : /* pMatchStr must match with all characters in pCompStr */
      87       22794 : static bool Impl_matchString( const char* pCompStr, const char* pMatchStr )
      88             : {
      89             :     /* We test only for end in MatchStr, because the last 0 character from */
      90             :     /* pCompStr is unequal a character in MatchStr, so the loop terminates */
      91       55850 :     while ( *pMatchStr )
      92             :     {
      93       32586 :         if ( *pCompStr != *pMatchStr )
      94       22324 :             return false;
      95             : 
      96       10262 :         pCompStr++;
      97       10262 :         pMatchStr++;
      98             :     }
      99             : 
     100         470 :     return true;
     101             : }
     102             : 
     103             : /* ======================================================================= */
     104             : 
     105             : struct ImplStrCharsetDef
     106             : {
     107             :     const char*             mpCharsetStr;
     108             :     rtl_TextEncoding            meTextEncoding;
     109             : };
     110             : 
     111             : struct ImplStrFirstPartCharsetDef
     112             : {
     113             :     const char*             mpCharsetStr;
     114             :     const ImplStrCharsetDef*    mpSecondPartTab;
     115             : };
     116             : 
     117             : /* ======================================================================= */
     118             : 
     119     1100634 : sal_Bool SAL_CALL rtl_getTextEncodingInfo( rtl_TextEncoding eTextEncoding, rtl_TextEncodingInfo* pEncInfo )
     120             : {
     121             :     const ImplTextEncodingData* pData;
     122             : 
     123     1100634 :     pData = Impl_getTextEncodingData( eTextEncoding );
     124     1100634 :     if ( !pData )
     125             :     {
     126             :         /* HACK: For not implemented encoding, because not all
     127             :            calls handle the errors */
     128        1394 :         if ( pEncInfo->StructSize < 5 )
     129           2 :             return false;
     130        1392 :         pEncInfo->MinimumCharSize = 1;
     131             : 
     132        1392 :         if ( pEncInfo->StructSize < 6 )
     133           2 :             return true;
     134        1390 :         pEncInfo->MaximumCharSize = 1;
     135             : 
     136        1390 :         if ( pEncInfo->StructSize < 7 )
     137           2 :             return true;
     138        1388 :         pEncInfo->AverageCharSize = 1;
     139             : 
     140        1388 :         if ( pEncInfo->StructSize < 12 )
     141           2 :             return true;
     142        1386 :         pEncInfo->Flags = 0;
     143             : 
     144        1386 :         return false;
     145             :     }
     146             : 
     147     1099240 :     if ( pEncInfo->StructSize < 5 )
     148           2 :         return false;
     149     1099238 :     pEncInfo->MinimumCharSize = pData->mnMinCharSize;
     150             : 
     151     1099238 :     if ( pEncInfo->StructSize < 6 )
     152           2 :         return true;
     153     1099236 :     pEncInfo->MaximumCharSize = pData->mnMaxCharSize;
     154             : 
     155     1099236 :     if ( pEncInfo->StructSize < 7 )
     156           2 :         return true;
     157     1099234 :     pEncInfo->AverageCharSize = pData->mnAveCharSize;
     158             : 
     159     1099234 :     if ( pEncInfo->StructSize < 12 )
     160           2 :         return true;
     161     1099232 :     pEncInfo->Flags = pData->mnInfoFlags;
     162             : 
     163     1099232 :     return true;
     164             : }
     165             : 
     166             : /* ======================================================================= */
     167             : 
     168       45368 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromWindowsCharset( sal_uInt8 nWinCharset )
     169             : {
     170             :     rtl_TextEncoding eTextEncoding;
     171             : 
     172       45368 :     switch ( nWinCharset )
     173             :     {
     174       26362 :         case 0:     eTextEncoding = RTL_TEXTENCODING_MS_1252; break;    /* ANSI_CHARSET */
     175        4314 :         case 2:     eTextEncoding = RTL_TEXTENCODING_SYMBOL; break;     /* SYMBOL_CHARSET */
     176          16 :         case 77:    eTextEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break;/* MAC_CHARSET */
     177         856 :         case 128:   eTextEncoding = RTL_TEXTENCODING_MS_932; break;     /* SHIFTJIS_CHARSET */
     178          12 :         case 129:   eTextEncoding = RTL_TEXTENCODING_MS_949; break;     /* HANGEUL_CHARSET */
     179           0 :         case 130:   eTextEncoding = RTL_TEXTENCODING_MS_1361; break;    /* JOHAB_CHARSET */
     180         732 :         case 134:   eTextEncoding = RTL_TEXTENCODING_MS_936; break;     /* GB2312_CHARSET */
     181          14 :         case 136:   eTextEncoding = RTL_TEXTENCODING_MS_950; break;     /* CHINESEBIG5_CHARSET */
     182        1576 :         case 161:   eTextEncoding = RTL_TEXTENCODING_MS_1253; break;    /* GREEK_CHARSET */
     183           0 :         case 162:   eTextEncoding = RTL_TEXTENCODING_MS_1254; break;    /* TURKISH_CHARSET */
     184           0 :         case 163:   eTextEncoding = RTL_TEXTENCODING_MS_1258; break;    /* VIETNAMESE_CHARSET !!! */
     185          18 :         case 177:   eTextEncoding = RTL_TEXTENCODING_MS_1255; break;    /* HEBREW_CHARSET */
     186        2460 :         case 178:   eTextEncoding = RTL_TEXTENCODING_MS_1256; break;    /* ARABIC_CHARSET */
     187           2 :         case 186:   eTextEncoding = RTL_TEXTENCODING_MS_1257; break;    /* BALTIC_CHARSET */
     188          64 :         case 204:   eTextEncoding = RTL_TEXTENCODING_MS_1251; break;    /* RUSSIAN_CHARSET */
     189           8 :         case 222:   eTextEncoding = RTL_TEXTENCODING_MS_874; break;     /* THAI_CHARSET */
     190        3266 :         case 238:   eTextEncoding = RTL_TEXTENCODING_MS_1250; break;    /* EASTEUROPE_CHARSET */
     191           0 :         case 255:   eTextEncoding = RTL_TEXTENCODING_IBM_850; break;    /* OEM_CHARSET */
     192        5668 :         default:    eTextEncoding = RTL_TEXTENCODING_DONTKNOW; break;
     193             :     }
     194             : 
     195       45368 :     return eTextEncoding;
     196             : }
     197             : 
     198             : /* ----------------------------------------------------------------------- */
     199             : 
     200         220 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromUnixCharset( const char* pUnixCharset )
     201             : {
     202             :     /* See <ftp://ftp.x.org/pub/DOCS/registry>, section 14 ("Font Charset
     203             :      * (Registry and Encoding) Names").
     204             :      */
     205             : 
     206             :     /* All Identifiers in the tables are lower case The function search */
     207             :     /* for the first matching string in the tables. */
     208             :     /* Sort order: unique (first 14, than 1), important */
     209             : 
     210             :     static ImplStrCharsetDef const aUnixCharsetISOTab[] =
     211             :     {
     212             :         { "15", RTL_TEXTENCODING_ISO_8859_15 },
     213             :         { "14", RTL_TEXTENCODING_ISO_8859_14 },
     214             :         { "13", RTL_TEXTENCODING_ISO_8859_13 },
     215             :         { "11", RTL_TEXTENCODING_TIS_620 },
     216             :         { "10", RTL_TEXTENCODING_ISO_8859_10 },
     217             :         { "1", RTL_TEXTENCODING_ISO_8859_1 },
     218             :         { "2", RTL_TEXTENCODING_ISO_8859_2 },
     219             :         { "3", RTL_TEXTENCODING_ISO_8859_3 },
     220             :         { "4", RTL_TEXTENCODING_ISO_8859_4 },
     221             :         { "5", RTL_TEXTENCODING_ISO_8859_5 },
     222             :         { "6", RTL_TEXTENCODING_ISO_8859_6 },
     223             :         { "7", RTL_TEXTENCODING_ISO_8859_7 },
     224             :         { "8", RTL_TEXTENCODING_ISO_8859_8 },
     225             :         { "9", RTL_TEXTENCODING_ISO_8859_9 },
     226             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     227             :     };
     228             : 
     229             :     static ImplStrCharsetDef const aUnixCharsetADOBETab[] =
     230             :     {
     231             :         { "fontspecific", RTL_TEXTENCODING_SYMBOL },
     232             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     233             :     };
     234             : 
     235             :     static ImplStrCharsetDef const aUnixCharsetMSTab[] =
     236             :     {
     237             :         { "1252", RTL_TEXTENCODING_MS_1252 },
     238             :         { "1250", RTL_TEXTENCODING_MS_1250 },
     239             :         { "1251", RTL_TEXTENCODING_MS_1251 },
     240             :         { "1253", RTL_TEXTENCODING_MS_1253 },
     241             :         { "1254", RTL_TEXTENCODING_MS_1254 },
     242             :         { "1255", RTL_TEXTENCODING_MS_1255 },
     243             :         { "1256", RTL_TEXTENCODING_MS_1256 },
     244             :         { "1257", RTL_TEXTENCODING_MS_1257 },
     245             :         { "1258", RTL_TEXTENCODING_MS_1258 },
     246             :         { "932", RTL_TEXTENCODING_MS_932 },
     247             :         { "936", RTL_TEXTENCODING_MS_936 },
     248             :         { "949", RTL_TEXTENCODING_MS_949 },
     249             :         { "950", RTL_TEXTENCODING_MS_950 },
     250             :         { "1361", RTL_TEXTENCODING_MS_1361 },
     251             :         { "cp1252", RTL_TEXTENCODING_MS_1252 },
     252             :         { "cp1250", RTL_TEXTENCODING_MS_1250 },
     253             :         { "cp1251", RTL_TEXTENCODING_MS_1251 },
     254             :         { "cp1253", RTL_TEXTENCODING_MS_1253 },
     255             :         { "cp1254", RTL_TEXTENCODING_MS_1254 },
     256             :         { "cp1255", RTL_TEXTENCODING_MS_1255 },
     257             :         { "cp1256", RTL_TEXTENCODING_MS_1256 },
     258             :         { "cp1257", RTL_TEXTENCODING_MS_1257 },
     259             :         { "cp1258", RTL_TEXTENCODING_MS_1258 },
     260             :         { "cp932", RTL_TEXTENCODING_MS_932 },
     261             :         { "cp936", RTL_TEXTENCODING_MS_936 },
     262             :         { "cp949", RTL_TEXTENCODING_MS_949 },
     263             :         { "cp950", RTL_TEXTENCODING_MS_950 },
     264             :         { "cp1361", RTL_TEXTENCODING_MS_1361 },
     265             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     266             :     };
     267             : 
     268             :     static ImplStrCharsetDef const aUnixCharsetIBMTab[] =
     269             :     {
     270             :         { "437", RTL_TEXTENCODING_IBM_437 },
     271             :         { "850", RTL_TEXTENCODING_IBM_850 },
     272             :         { "860", RTL_TEXTENCODING_IBM_860 },
     273             :         { "861", RTL_TEXTENCODING_IBM_861 },
     274             :         { "863", RTL_TEXTENCODING_IBM_863 },
     275             :         { "865", RTL_TEXTENCODING_IBM_865 },
     276             :         { "737", RTL_TEXTENCODING_IBM_737 },
     277             :         { "775", RTL_TEXTENCODING_IBM_775 },
     278             :         { "852", RTL_TEXTENCODING_IBM_852 },
     279             :         { "855", RTL_TEXTENCODING_IBM_855 },
     280             :         { "857", RTL_TEXTENCODING_IBM_857 },
     281             :         { "862", RTL_TEXTENCODING_IBM_862 },
     282             :         { "864", RTL_TEXTENCODING_IBM_864 },
     283             :         { "866", RTL_TEXTENCODING_IBM_866 },
     284             :         { "869", RTL_TEXTENCODING_IBM_869 },
     285             :         { "874", RTL_TEXTENCODING_MS_874 },
     286             :         { "1004", RTL_TEXTENCODING_MS_1252 },
     287             :         { "65400", RTL_TEXTENCODING_SYMBOL },
     288             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     289             :     };
     290             : 
     291             :     static ImplStrCharsetDef const aUnixCharsetKOI8Tab[] =
     292             :     {
     293             :         { "r", RTL_TEXTENCODING_KOI8_R },
     294             :         { "u", RTL_TEXTENCODING_KOI8_U },
     295             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     296             :     };
     297             : 
     298             :     static ImplStrCharsetDef const aUnixCharsetJISX0208Tab[] =
     299             :     {
     300             :         { NULL, RTL_TEXTENCODING_JIS_X_0208 }
     301             :     };
     302             : 
     303             :     static ImplStrCharsetDef const aUnixCharsetJISX0201Tab[] =
     304             :     {
     305             :         { NULL, RTL_TEXTENCODING_JIS_X_0201 }
     306             :     };
     307             : 
     308             :     static ImplStrCharsetDef const aUnixCharsetJISX0212Tab[] =
     309             :     {
     310             :         { NULL, RTL_TEXTENCODING_JIS_X_0212 }
     311             :     };
     312             : 
     313             :     static ImplStrCharsetDef const aUnixCharsetGBTab[] =
     314             :     {
     315             :         { NULL, RTL_TEXTENCODING_GB_2312 }
     316             :     };
     317             : 
     318             :     static ImplStrCharsetDef const aUnixCharsetGBKTab[] =
     319             :     {
     320             :         { NULL, RTL_TEXTENCODING_GBK }
     321             :     };
     322             : 
     323             :     static ImplStrCharsetDef const aUnixCharsetBIG5Tab[] =
     324             :     {
     325             :         { NULL, RTL_TEXTENCODING_BIG5 }
     326             :     };
     327             : 
     328             :     static ImplStrCharsetDef const aUnixCharsetKSC56011987Tab[] =
     329             :     {
     330             :         { NULL, RTL_TEXTENCODING_EUC_KR }
     331             :     };
     332             : 
     333             :     static ImplStrCharsetDef const aUnixCharsetKSC56011992Tab[] =
     334             :     {
     335             :         { NULL, RTL_TEXTENCODING_MS_1361 }
     336             :     };
     337             : 
     338             :     static ImplStrCharsetDef const aUnixCharsetISO10646Tab[] =
     339             :     {
     340             :         { NULL, RTL_TEXTENCODING_UNICODE }
     341             :     };
     342             : 
     343             :     static ImplStrCharsetDef const aUnixCharsetUNICODETab[] =
     344             :     {
     345             : /* Currently every Unicode Encoding is for us Unicode */
     346             : /*        { "fontspecific", RTL_TEXTENCODING_UNICODE }, */
     347             :         { NULL, RTL_TEXTENCODING_UNICODE }
     348             :     };
     349             : 
     350             :     static ImplStrCharsetDef const aUnixCharsetSymbolTab[] =
     351             :     {
     352             :         { NULL, RTL_TEXTENCODING_SYMBOL }
     353             :     };
     354             : 
     355             :     /* See <http://cvs.freedesktop.org/xorg/xc/fonts/encodings/iso8859-11.enc?
     356             :        rev=1.1.1.1>: */
     357             :     static ImplStrCharsetDef const aUnixCharsetTIS620Tab[] =
     358             :     {
     359             :         { "0", RTL_TEXTENCODING_TIS_620 },
     360             :         { "2529", RTL_TEXTENCODING_TIS_620 },
     361             :         { "2533", RTL_TEXTENCODING_TIS_620 },
     362             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     363             :     };
     364             :     static ImplStrCharsetDef const aUnixCharsetTIS6202529Tab[] =
     365             :     {
     366             :         { "1", RTL_TEXTENCODING_TIS_620 },
     367             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     368             :     };
     369             :     static ImplStrCharsetDef const aUnixCharsetTIS6202533Tab[] =
     370             :     {
     371             :         { "0", RTL_TEXTENCODING_TIS_620 },
     372             :         { "1", RTL_TEXTENCODING_TIS_620 },
     373             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     374             :     };
     375             : 
     376             :     static ImplStrFirstPartCharsetDef const aUnixCharsetFirstPartTab[] =
     377             :     {
     378             :         { "iso8859", aUnixCharsetISOTab },
     379             :         { "adobe", aUnixCharsetADOBETab },
     380             :         { "ansi", aUnixCharsetMSTab },
     381             :         { "microsoft", aUnixCharsetMSTab },
     382             :         { "ibm", aUnixCharsetIBMTab },
     383             :         { "koi8", aUnixCharsetKOI8Tab },
     384             :         { "jisx0208", aUnixCharsetJISX0208Tab },
     385             :         { "jisx0208.1983", aUnixCharsetJISX0208Tab },
     386             :         { "jisx0201", aUnixCharsetJISX0201Tab },
     387             :         { "jisx0201.1976", aUnixCharsetJISX0201Tab },
     388             :         { "jisx0212", aUnixCharsetJISX0212Tab },
     389             :         { "jisx0212.1990", aUnixCharsetJISX0212Tab },
     390             :         { "gb2312", aUnixCharsetGBTab },
     391             :         { "gbk", aUnixCharsetGBKTab },
     392             :         { "big5", aUnixCharsetBIG5Tab },
     393             :         { "iso10646", aUnixCharsetISO10646Tab },
     394             : /*      { "unicode", aUnixCharsetUNICODETab }, */ /* fonts contain only default chars */
     395             :         { "sunolcursor", aUnixCharsetSymbolTab },
     396             :         { "sunolglyph", aUnixCharsetSymbolTab },
     397             :         { "iso10646", aUnixCharsetUNICODETab },
     398             :         { "ksc5601.1987", aUnixCharsetKSC56011987Tab },
     399             :         { "ksc5601.1992", aUnixCharsetKSC56011992Tab },
     400             :         { "tis620.2529", aUnixCharsetTIS6202529Tab },
     401             :         { "tis620.2533", aUnixCharsetTIS6202533Tab },
     402             :         { "tis620", aUnixCharsetTIS620Tab },
     403             : /*        { "sunudcja.1997",  },        */
     404             : /*        { "sunudcko.1997",  },        */
     405             : /*        { "sunudczh.1997",  },        */
     406             : /*        { "sunudczhtw.1997",  },      */
     407             :         { NULL, NULL }
     408             :     };
     409             : 
     410         220 :     rtl_TextEncoding    eEncoding = RTL_TEXTENCODING_DONTKNOW;
     411             :     char*           pTempBuf;
     412         220 :     sal_uInt32          nBufLen = strlen( pUnixCharset )+1;
     413             :     const char*     pFirstPart;
     414             :     const char*     pSecondPart;
     415             : 
     416             :     /* Alloc Buffer and map to lower case */
     417         220 :     boost::scoped_array<char> pBuf(new char[nBufLen]);
     418         220 :     Impl_toAsciiLower( pUnixCharset, pBuf.get() );
     419             : 
     420             :     /* Search FirstPart */
     421         220 :     pFirstPart = pBuf.get();
     422         220 :     pSecondPart = NULL;
     423         220 :     pTempBuf = pBuf.get();
     424        1810 :     while ( *pTempBuf )
     425             :     {
     426        1582 :         if ( *pTempBuf == '-' )
     427             :         {
     428         212 :             *pTempBuf = '\0';
     429         212 :             pSecondPart = pTempBuf+1;
     430         212 :             break;
     431             :         }
     432             : 
     433        1370 :         pTempBuf++;
     434             :     }
     435             : 
     436             :     /* Parttrenner gefunden */
     437         220 :     if ( pSecondPart )
     438             :     {
     439             :         /* Search for the part tab */
     440         212 :         const ImplStrFirstPartCharsetDef* pFirstPartData = aUnixCharsetFirstPartTab;
     441        2114 :         while ( pFirstPartData->mpCharsetStr )
     442             :         {
     443        1852 :             if ( Impl_matchString( pFirstPart, pFirstPartData->mpCharsetStr ) )
     444             :             {
     445             :                 /* Search for the charset in the second part tab */
     446         162 :                 const ImplStrCharsetDef* pData = pFirstPartData->mpSecondPartTab;
     447        1348 :                 while ( pData->mpCharsetStr )
     448             :                 {
     449        1172 :                     if ( Impl_matchString( pSecondPart, pData->mpCharsetStr ) )
     450             :                     {
     451         148 :                         break;
     452             :                     }
     453             : 
     454        1024 :                     pData++;
     455             :                 }
     456             : 
     457             :                 /* use default encoding for first part */
     458         162 :                 eEncoding = pData->meTextEncoding;
     459         162 :                 break;
     460             :             }
     461             : 
     462        1690 :             pFirstPartData++;
     463             :         }
     464             :     }
     465             : 
     466         220 :     return eEncoding;
     467             : }
     468             : 
     469             : /* ----------------------------------------------------------------------- */
     470             : 
     471        6465 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromMimeCharset( const char* pMimeCharset )
     472             : {
     473             :     /* All Identifiers are in lower case and contain only alphanumeric */
     474             :     /* characters. The function search for the first equal string in */
     475             :     /* the table. In this table are only the most used mime types. */
     476             :     /* Sort order: important */
     477             :     static ImplStrCharsetDef const aVIPMimeCharsetTab[] =
     478             :     {
     479             :         { "usascii", RTL_TEXTENCODING_ASCII_US },
     480             :         { "utf8", RTL_TEXTENCODING_UTF8 },
     481             :         { "utf7", RTL_TEXTENCODING_UTF7 },
     482             :         { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
     483             :         { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
     484             :         { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
     485             :         { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
     486             :         { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
     487             :         { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
     488             :         { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
     489             :         { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
     490             :         { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
     491             :         { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
     492             :         { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
     493             :         { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
     494             :         { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
     495             :         { "iso2022jp", RTL_TEXTENCODING_ISO_2022_JP },
     496             :         { "iso2022jp2", RTL_TEXTENCODING_ISO_2022_JP },
     497             :         { "iso2022cn", RTL_TEXTENCODING_ISO_2022_CN },
     498             :         { "iso2022cnext", RTL_TEXTENCODING_ISO_2022_CN },
     499             :         { "iso2022kr", RTL_TEXTENCODING_ISO_2022_KR },
     500             :         { "eucjp", RTL_TEXTENCODING_EUC_JP },
     501             :         { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
     502             :         { "mskanji", RTL_TEXTENCODING_MS_932 },
     503             :         { "gb2312", RTL_TEXTENCODING_GB_2312 },
     504             :         { "cngb", RTL_TEXTENCODING_GB_2312 },
     505             :         { "big5", RTL_TEXTENCODING_BIG5 },
     506             :         { "cnbig5", RTL_TEXTENCODING_BIG5 },
     507             :         { "cngb12345", RTL_TEXTENCODING_GBT_12345 },
     508             :         { "euckr", RTL_TEXTENCODING_EUC_KR },
     509             :         { "koi8r", RTL_TEXTENCODING_KOI8_R },
     510             :         { "windows1252", RTL_TEXTENCODING_MS_1252 },
     511             :         { "windows1250", RTL_TEXTENCODING_MS_1250 },
     512             :         { "windows1251", RTL_TEXTENCODING_MS_1251 },
     513             :         { "windows1253", RTL_TEXTENCODING_MS_1253 },
     514             :         { "windows1254", RTL_TEXTENCODING_MS_1254 },
     515             :         { "windows1255", RTL_TEXTENCODING_MS_1255 },
     516             :         { "windows1256", RTL_TEXTENCODING_MS_1256 },
     517             :         { "windows1257", RTL_TEXTENCODING_MS_1257 },
     518             :         { "windows1258", RTL_TEXTENCODING_MS_1258 },
     519             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     520             :     };
     521             : 
     522             :     /* All Identifiers are in lower case and contain only alphanumeric */
     523             :     /* characters. The function search for the first matching string in */
     524             :     /* the table. */
     525             :     /* Sort order: unique (first iso885914, than iso88591), important */
     526             :     static ImplStrCharsetDef const aMimeCharsetTab[] =
     527             :     {
     528             :         { "unicode11utf7", RTL_TEXTENCODING_UTF7 },
     529             :         { "caunicode11utf7", RTL_TEXTENCODING_UTF7 },
     530             :         { "iso88591windows30", RTL_TEXTENCODING_ISO_8859_1 },
     531             :         { "iso88591win", RTL_TEXTENCODING_MS_1252 },
     532             :         { "iso88592win", RTL_TEXTENCODING_MS_1250 },
     533             :         { "iso88599win", RTL_TEXTENCODING_MS_1254 },
     534             :         { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
     535             :         { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
     536             :         { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
     537             :         { "iso885911", RTL_TEXTENCODING_TIS_620 },
     538             :             /* This is no official MIME character set name, but it might be in
     539             :                use in Thailand. */
     540             :         { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
     541             :         { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
     542             :         { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
     543             :         { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
     544             :         { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
     545             :         { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
     546             :         { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
     547             :         { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
     548             :         { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
     549             :         { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
     550             :         { "isoir100", RTL_TEXTENCODING_ISO_8859_1 },
     551             :         { "latin1", RTL_TEXTENCODING_ISO_8859_1 },
     552             :         { "l1", RTL_TEXTENCODING_ISO_8859_1 },
     553             :         { "cp819", RTL_TEXTENCODING_ISO_8859_1 },
     554             :         { "ibm819", RTL_TEXTENCODING_ISO_8859_1 },
     555             :         { "csisolatin1", RTL_TEXTENCODING_ISO_8859_1 },
     556             :         { "isoir101", RTL_TEXTENCODING_ISO_8859_2 },
     557             :         { "latin2", RTL_TEXTENCODING_ISO_8859_2 },
     558             :         { "l2", RTL_TEXTENCODING_ISO_8859_2 },
     559             :         { "csisolatin2", RTL_TEXTENCODING_ISO_8859_2 },
     560             :         { "isoir109", RTL_TEXTENCODING_ISO_8859_3 },
     561             :         { "latin3", RTL_TEXTENCODING_ISO_8859_3 },
     562             :         { "l3", RTL_TEXTENCODING_ISO_8859_3 },
     563             :         { "csisolatin3", RTL_TEXTENCODING_ISO_8859_3 },
     564             :         { "isoir110", RTL_TEXTENCODING_ISO_8859_4 },
     565             :         { "latin4", RTL_TEXTENCODING_ISO_8859_4 },
     566             :         { "l4", RTL_TEXTENCODING_ISO_8859_4 },
     567             :         { "csisolatin4", RTL_TEXTENCODING_ISO_8859_4 },
     568             :         { "isoir144", RTL_TEXTENCODING_ISO_8859_5 },
     569             :         { "cyrillicasian", RTL_TEXTENCODING_PT154 },
     570             :         { "cyrillic", RTL_TEXTENCODING_ISO_8859_5 },
     571             :         { "csisolatincyrillic", RTL_TEXTENCODING_ISO_8859_5 },
     572             :         { "isoir127", RTL_TEXTENCODING_ISO_8859_6 },
     573             :         { "arabic", RTL_TEXTENCODING_ISO_8859_6 },
     574             :         { "csisolatinarabic", RTL_TEXTENCODING_ISO_8859_6 },
     575             :         { "ecma114", RTL_TEXTENCODING_ISO_8859_6 },
     576             :         { "asmo708", RTL_TEXTENCODING_ISO_8859_6 },
     577             :         { "isoir126", RTL_TEXTENCODING_ISO_8859_7 },
     578             :         { "greek", RTL_TEXTENCODING_ISO_8859_7 },
     579             :         { "csisolatingreek", RTL_TEXTENCODING_ISO_8859_7 },
     580             :         { "elot928", RTL_TEXTENCODING_ISO_8859_7 },
     581             :         { "ecma118", RTL_TEXTENCODING_ISO_8859_7 },
     582             :         { "isoir138", RTL_TEXTENCODING_ISO_8859_8 },
     583             :         { "hebrew", RTL_TEXTENCODING_ISO_8859_8 },
     584             :         { "csisolatinhebrew", RTL_TEXTENCODING_ISO_8859_8 },
     585             :         { "isoir148", RTL_TEXTENCODING_ISO_8859_9 },
     586             :         { "latin5", RTL_TEXTENCODING_ISO_8859_9 },
     587             :         { "l5", RTL_TEXTENCODING_ISO_8859_9 },
     588             :         { "csisolatin5", RTL_TEXTENCODING_ISO_8859_9 },
     589             :         { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
     590             :         { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
     591             :         { "cswindows31latin1", RTL_TEXTENCODING_MS_1252 },
     592             :         { "cswindows31latin2", RTL_TEXTENCODING_MS_1250 },
     593             :         { "cswindows31latin5", RTL_TEXTENCODING_MS_1254 },
     594             :         { "iso10646us", RTL_TEXTENCODING_ASCII_US },
     595             :         { "iso646irv", RTL_TEXTENCODING_ASCII_US },
     596             :         { "cskoi8r", RTL_TEXTENCODING_KOI8_R },
     597             :         { "ibm437", RTL_TEXTENCODING_IBM_437 },
     598             :         { "cp437", RTL_TEXTENCODING_IBM_437 },
     599             :         { "437", RTL_TEXTENCODING_IBM_437 },
     600             :         { "cspc8codepage437", RTL_TEXTENCODING_IBM_437 },
     601             :         { "ansix34", RTL_TEXTENCODING_ASCII_US },
     602             :         { "ibm367", RTL_TEXTENCODING_ASCII_US },
     603             :         { "cp367", RTL_TEXTENCODING_ASCII_US },
     604             :         { "csascii", RTL_TEXTENCODING_ASCII_US },
     605             :         { "ibm775", RTL_TEXTENCODING_IBM_775 },
     606             :         { "cp775", RTL_TEXTENCODING_IBM_775 },
     607             :         { "cspc775baltic", RTL_TEXTENCODING_IBM_775 },
     608             :         { "ibm850", RTL_TEXTENCODING_IBM_850 },
     609             :         { "cp850", RTL_TEXTENCODING_IBM_850 },
     610             :         { "850", RTL_TEXTENCODING_IBM_850 },
     611             :         { "cspc850multilingual", RTL_TEXTENCODING_IBM_850 },
     612             : /*        { "ibm851", RTL_TEXTENCODING_IBM_851 }, */
     613             : /*        { "cp851", RTL_TEXTENCODING_IBM_851 }, */
     614             : /*        { "851", RTL_TEXTENCODING_IBM_851 }, */
     615             : /*        { "csibm851", RTL_TEXTENCODING_IBM_851 }, */
     616             :         { "ibm852", RTL_TEXTENCODING_IBM_852 },
     617             :         { "cp852", RTL_TEXTENCODING_IBM_852 },
     618             :         { "852", RTL_TEXTENCODING_IBM_852 },
     619             :         { "cspcp852", RTL_TEXTENCODING_IBM_852 },
     620             :         { "ibm855", RTL_TEXTENCODING_IBM_855 },
     621             :         { "cp855", RTL_TEXTENCODING_IBM_855 },
     622             :         { "855", RTL_TEXTENCODING_IBM_855 },
     623             :         { "csibm855", RTL_TEXTENCODING_IBM_855 },
     624             :         { "ibm857", RTL_TEXTENCODING_IBM_857 },
     625             :         { "cp857", RTL_TEXTENCODING_IBM_857 },
     626             :         { "857", RTL_TEXTENCODING_IBM_857 },
     627             :         { "csibm857", RTL_TEXTENCODING_IBM_857 },
     628             :         { "ibm860", RTL_TEXTENCODING_IBM_860 },
     629             :         { "cp860", RTL_TEXTENCODING_IBM_860 },
     630             :         { "860", RTL_TEXTENCODING_IBM_860 },
     631             :         { "csibm860", RTL_TEXTENCODING_IBM_860 },
     632             :         { "ibm861", RTL_TEXTENCODING_IBM_861 },
     633             :         { "cp861", RTL_TEXTENCODING_IBM_861 },
     634             :         { "861", RTL_TEXTENCODING_IBM_861 },
     635             :         { "csis", RTL_TEXTENCODING_IBM_861 },
     636             :         { "csibm861", RTL_TEXTENCODING_IBM_861 },
     637             :         { "ibm862", RTL_TEXTENCODING_IBM_862 },
     638             :         { "cp862", RTL_TEXTENCODING_IBM_862 },
     639             :         { "862", RTL_TEXTENCODING_IBM_862 },
     640             :         { "cspc862latinhebrew", RTL_TEXTENCODING_IBM_862 },
     641             :         { "ibm863", RTL_TEXTENCODING_IBM_863 },
     642             :         { "cp863", RTL_TEXTENCODING_IBM_863 },
     643             :         { "863", RTL_TEXTENCODING_IBM_863 },
     644             :         { "csibm863", RTL_TEXTENCODING_IBM_863 },
     645             :         { "ibm864", RTL_TEXTENCODING_IBM_864 },
     646             :         { "cp864", RTL_TEXTENCODING_IBM_864 },
     647             :         { "864", RTL_TEXTENCODING_IBM_864 },
     648             :         { "csibm864", RTL_TEXTENCODING_IBM_864 },
     649             :         { "ibm865", RTL_TEXTENCODING_IBM_865 },
     650             :         { "cp865", RTL_TEXTENCODING_IBM_865 },
     651             :         { "865", RTL_TEXTENCODING_IBM_865 },
     652             :         { "csibm865", RTL_TEXTENCODING_IBM_865 },
     653             :         { "ibm866", RTL_TEXTENCODING_IBM_866 },
     654             :         { "cp866", RTL_TEXTENCODING_IBM_866 },
     655             :         { "866", RTL_TEXTENCODING_IBM_866 },
     656             :         { "csibm866", RTL_TEXTENCODING_IBM_866 },
     657             : /*        { "ibm868", RTL_TEXTENCODING_IBM_868 }, */
     658             : /*        { "cp868", RTL_TEXTENCODING_IBM_868 }, */
     659             : /*        { "cpar", RTL_TEXTENCODING_IBM_868 }, */
     660             : /*        { "csibm868", RTL_TEXTENCODING_IBM_868 }, */
     661             :         { "ibm869", RTL_TEXTENCODING_IBM_869 },
     662             :         { "cp869", RTL_TEXTENCODING_IBM_869 },
     663             :         { "869", RTL_TEXTENCODING_IBM_869 },
     664             :         { "cpgr", RTL_TEXTENCODING_IBM_869 },
     665             :         { "csibm869", RTL_TEXTENCODING_IBM_869 },
     666             :         { "ibm869", RTL_TEXTENCODING_IBM_869 },
     667             :         { "cp869", RTL_TEXTENCODING_IBM_869 },
     668             :         { "869", RTL_TEXTENCODING_IBM_869 },
     669             :         { "cpgr", RTL_TEXTENCODING_IBM_869 },
     670             :         { "csibm869", RTL_TEXTENCODING_IBM_869 },
     671             :         { "mac", RTL_TEXTENCODING_APPLE_ROMAN },
     672             :         { "csmacintosh", RTL_TEXTENCODING_APPLE_ROMAN },
     673             :         { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
     674             :         { "mskanji", RTL_TEXTENCODING_MS_932 },
     675             :         { "csshiftjis", RTL_TEXTENCODING_SHIFT_JIS },
     676             :         { "jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
     677             :         { "jisc62261983", RTL_TEXTENCODING_JIS_X_0208 },
     678             :         { "csiso87jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
     679             :         { "isoir86", RTL_TEXTENCODING_JIS_X_0208 },
     680             :         { "x0208", RTL_TEXTENCODING_JIS_X_0208 },
     681             :         { "jisx0201", RTL_TEXTENCODING_JIS_X_0201 },
     682             :         { "cshalfwidthkatakana", RTL_TEXTENCODING_JIS_X_0201 },
     683             :         { "x0201", RTL_TEXTENCODING_JIS_X_0201 },
     684             :         { "jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
     685             :         { "csiso159jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
     686             :         { "isoir159", RTL_TEXTENCODING_JIS_X_0208 },
     687             :         { "x0212", RTL_TEXTENCODING_JIS_X_0212 },
     688             :         { "isoir6", RTL_TEXTENCODING_ASCII_US },
     689             :         { "xsjis", RTL_TEXTENCODING_SHIFT_JIS },
     690             :         { "sjis", RTL_TEXTENCODING_SHIFT_JIS },
     691             :         { "ascii", RTL_TEXTENCODING_ASCII_US },
     692             :         { "us", RTL_TEXTENCODING_ASCII_US },
     693             :         { "gb180302000", RTL_TEXTENCODING_GB_18030 },
     694             :             /* This is no actual MIME character set name, it is only in here
     695             :                for backwards compatibility (before "GB18030" was officially
     696             :                registered with IANA, this code contained some guesses of what
     697             :                would become official names for GB18030). */
     698             :         { "gb18030", RTL_TEXTENCODING_GB_18030 },
     699             :         { "big5hkscs", RTL_TEXTENCODING_BIG5_HKSCS },
     700             :         { "tis620", RTL_TEXTENCODING_TIS_620 },
     701             :         { "gbk", RTL_TEXTENCODING_GBK },
     702             :         { "cp936", RTL_TEXTENCODING_GBK },
     703             :         { "ms936", RTL_TEXTENCODING_GBK },
     704             :         { "windows936", RTL_TEXTENCODING_GBK },
     705             :         { "cp874", RTL_TEXTENCODING_MS_874 },
     706             :             /* This is no official MIME character set name, but it might be in
     707             :                use in Thailand. */
     708             :         { "ms874", RTL_TEXTENCODING_MS_874 },
     709             :             /* This is no official MIME character set name, but it might be in
     710             :                use in Thailand. */
     711             :         { "windows874", RTL_TEXTENCODING_MS_874 },
     712             :             /* This is no official MIME character set name, but it might be in
     713             :                use in Thailand. */
     714             :         { "koi8u", RTL_TEXTENCODING_KOI8_U },
     715             :         { "cpis", RTL_TEXTENCODING_IBM_861 },
     716             :         { "ksc56011987", RTL_TEXTENCODING_MS_949 },
     717             :         { "isoir149", RTL_TEXTENCODING_MS_949 },
     718             :         { "ksc56011989", RTL_TEXTENCODING_MS_949 },
     719             :         { "ksc5601", RTL_TEXTENCODING_MS_949 },
     720             :         { "korean", RTL_TEXTENCODING_MS_949 },
     721             :         { "csksc56011987", RTL_TEXTENCODING_MS_949 },
     722             :             /* Map KS_C_5601-1987 and aliases to MS-949 instead of EUC-KR, as
     723             :                this character set identifier seems to be prominently used by MS
     724             :                to stand for KS C 5601 plus MS-949 extensions */
     725             :         { "latin9", RTL_TEXTENCODING_ISO_8859_15 },
     726             :         { "adobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
     727             :         { "csadobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
     728             :         { "adobesymbolencoding", RTL_TEXTENCODING_ADOBE_SYMBOL },
     729             :         { "cshppsmath", RTL_TEXTENCODING_ADOBE_SYMBOL },
     730             :         { "ptcp154", RTL_TEXTENCODING_PT154 },
     731             :         { "csptcp154", RTL_TEXTENCODING_PT154 },
     732             :         { "pt154", RTL_TEXTENCODING_PT154 },
     733             :         { "cp154", RTL_TEXTENCODING_PT154 },
     734             :         { "xisciide", RTL_TEXTENCODING_ISCII_DEVANAGARI },
     735             :             /* This is not an official MIME character set name, but is in use by
     736             :                various windows APIs. */
     737             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     738             :     };
     739             : 
     740        6465 :     rtl_TextEncoding            eEncoding = RTL_TEXTENCODING_DONTKNOW;
     741        6465 :     const ImplStrCharsetDef*    pData = aVIPMimeCharsetTab;
     742        6465 :     sal_uInt32                  nBufLen = strlen( pMimeCharset )+1;
     743             : 
     744             :     /* Alloc Buffer and map to lower case and remove non alphanumeric chars */
     745        6465 :     boost::scoped_array<char> pBuf(new char[nBufLen]);
     746        6465 :     Impl_toAsciiLowerAndRemoveNonAlphanumeric( pMimeCharset, pBuf.get() );
     747             : 
     748             :     /* Search for equal in the VIP table */
     749       92231 :     while ( pData->mpCharsetStr )
     750             :     {
     751       85604 :         if ( strcmp( pBuf.get(), pData->mpCharsetStr ) == 0 )
     752             :         {
     753        6303 :             eEncoding = pData->meTextEncoding;
     754        6303 :             break;
     755             :         }
     756             : 
     757       79301 :         pData++;
     758             :     }
     759             : 
     760             :     /* Search for matching in the mime table */
     761        6465 :     if ( eEncoding == RTL_TEXTENCODING_DONTKNOW )
     762             :     {
     763         162 :         pData = aMimeCharsetTab;
     764       19934 :         while ( pData->mpCharsetStr )
     765             :         {
     766       19770 :             if ( Impl_matchString( pBuf.get(), pData->mpCharsetStr ) )
     767             :             {
     768         160 :                 eEncoding = pData->meTextEncoding;
     769         160 :                 break;
     770             :             }
     771             : 
     772       19610 :             pData++;
     773             :         }
     774             :     }
     775             : 
     776        6465 :     return eEncoding;
     777             : }
     778             : 
     779             : /* ======================================================================= */
     780             : 
     781       35564 : sal_uInt8 SAL_CALL rtl_getBestWindowsCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
     782             : {
     783       35564 :     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
     784       35564 :     if ( pData )
     785       34364 :         return pData->mnBestWindowsCharset;
     786             :     else
     787        1200 :         return 1;
     788             : }
     789             : 
     790             : /* ----------------------------------------------------------------------- */
     791             : 
     792         160 : const char* SAL_CALL rtl_getBestUnixCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding  )
     793             : {
     794         160 :     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
     795         160 :     if ( pData )
     796         160 :         return (char const *) pData->mpBestUnixCharset;
     797           0 :     else if( eTextEncoding == RTL_TEXTENCODING_UNICODE )
     798           0 :         return (char const *) "iso10646-1";
     799             :     else
     800           0 :         return 0;
     801             : }
     802             : 
     803             : /* ----------------------------------------------------------------------- */
     804             : 
     805        8080 : char const * SAL_CALL rtl_getMimeCharsetFromTextEncoding(rtl_TextEncoding
     806             :                                                              nEncoding)
     807             : {
     808        8080 :     ImplTextEncodingData const * p = Impl_getTextEncodingData(nEncoding);
     809        8056 :     return p && (p->mnInfoFlags & RTL_TEXTENCODING_INFO_MIME) != 0 ?
     810       15034 :                p->mpBestMimeCharset : NULL;
     811             : }
     812             : 
     813         620 : const char* SAL_CALL rtl_getBestMimeCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
     814             : {
     815         620 :     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
     816         620 :     if ( pData )
     817         620 :         return (char const *) pData->mpBestMimeCharset;
     818             :     else
     819           0 :         return 0;
     820             : }
     821             : 
     822             : /* The following two functions are based on <http://www.sharmahd.com/tm/
     823             :    codepages.html>, <http://msdn.microsoft.com/workshop/author/dhtml/reference/
     824             :    charsets/charset4.asp>, and <http://www.iana.org/assignments/character-sets>.
     825             :  */
     826             : 
     827             : rtl_TextEncoding SAL_CALL
     828        3116 : rtl_getTextEncodingFromWindowsCodePage(sal_uInt32 nCodePage)
     829             : {
     830        3116 :     switch (nCodePage)
     831             :     {
     832           2 :     case 437: return RTL_TEXTENCODING_IBM_437;
     833           2 :     case 708: return RTL_TEXTENCODING_ISO_8859_6;
     834           2 :     case 737: return RTL_TEXTENCODING_IBM_737;
     835           2 :     case 775: return RTL_TEXTENCODING_IBM_775;
     836           2 :     case 850: return RTL_TEXTENCODING_IBM_850;
     837           2 :     case 852: return RTL_TEXTENCODING_IBM_852;
     838           2 :     case 855: return RTL_TEXTENCODING_IBM_855;
     839           2 :     case 857: return RTL_TEXTENCODING_IBM_857;
     840           2 :     case 860: return RTL_TEXTENCODING_IBM_860;
     841           2 :     case 861: return RTL_TEXTENCODING_IBM_861;
     842           2 :     case 862: return RTL_TEXTENCODING_IBM_862;
     843           2 :     case 863: return RTL_TEXTENCODING_IBM_863;
     844           2 :     case 864: return RTL_TEXTENCODING_IBM_864;
     845           2 :     case 865: return RTL_TEXTENCODING_IBM_865;
     846           2 :     case 866: return RTL_TEXTENCODING_IBM_866;
     847           2 :     case 869: return RTL_TEXTENCODING_IBM_869;
     848           4 :     case 874: return RTL_TEXTENCODING_MS_874;
     849         122 :     case 932: return RTL_TEXTENCODING_MS_932;
     850          28 :     case 936: return RTL_TEXTENCODING_MS_936;
     851           2 :     case 949: return RTL_TEXTENCODING_MS_949;
     852          10 :     case 950: return RTL_TEXTENCODING_MS_950;
     853         184 :     case 1250: return RTL_TEXTENCODING_MS_1250;
     854         152 :     case 1251: return RTL_TEXTENCODING_MS_1251;
     855        1614 :     case 1252: return RTL_TEXTENCODING_MS_1252;
     856          72 :     case 1253: return RTL_TEXTENCODING_MS_1253;
     857          74 :     case 1254: return RTL_TEXTENCODING_MS_1254;
     858          52 :     case 1255: return RTL_TEXTENCODING_MS_1255;
     859          48 :     case 1256: return RTL_TEXTENCODING_MS_1256;
     860          72 :     case 1257: return RTL_TEXTENCODING_MS_1257;
     861          60 :     case 1258: return RTL_TEXTENCODING_MS_1258;
     862           2 :     case 1361: return RTL_TEXTENCODING_MS_1361;
     863           2 :     case 10000: return RTL_TEXTENCODING_APPLE_ROMAN;
     864           2 :     case 10001: return RTL_TEXTENCODING_APPLE_JAPANESE;
     865           2 :     case 10002: return RTL_TEXTENCODING_APPLE_CHINTRAD;
     866           2 :     case 10003: return RTL_TEXTENCODING_APPLE_KOREAN;
     867           2 :     case 10004: return RTL_TEXTENCODING_APPLE_ARABIC;
     868           2 :     case 10005: return RTL_TEXTENCODING_APPLE_HEBREW;
     869           2 :     case 10006: return RTL_TEXTENCODING_APPLE_GREEK;
     870           2 :     case 10007: return RTL_TEXTENCODING_APPLE_CYRILLIC;
     871           2 :     case 10008: return RTL_TEXTENCODING_APPLE_CHINSIMP;
     872           2 :     case 10010: return RTL_TEXTENCODING_APPLE_ROMANIAN;
     873           2 :     case 10017: return RTL_TEXTENCODING_APPLE_UKRAINIAN;
     874           2 :     case 10029: return RTL_TEXTENCODING_APPLE_CENTEURO;
     875           2 :     case 10079: return RTL_TEXTENCODING_APPLE_ICELAND;
     876           2 :     case 10081: return RTL_TEXTENCODING_APPLE_TURKISH;
     877           2 :     case 10082: return RTL_TEXTENCODING_APPLE_CROATIAN;
     878           2 :     case 20127: return RTL_TEXTENCODING_ASCII_US;
     879           2 :     case 20866: return RTL_TEXTENCODING_KOI8_R;
     880           2 :     case 21866: return RTL_TEXTENCODING_KOI8_U;
     881           2 :     case 28591: return RTL_TEXTENCODING_ISO_8859_1;
     882           2 :     case 28592: return RTL_TEXTENCODING_ISO_8859_2;
     883           2 :     case 28593: return RTL_TEXTENCODING_ISO_8859_3;
     884           2 :     case 28594: return RTL_TEXTENCODING_ISO_8859_4;
     885           2 :     case 28595: return RTL_TEXTENCODING_ISO_8859_5;
     886           2 :     case 28596: return RTL_TEXTENCODING_ISO_8859_6;
     887           2 :     case 28597: return RTL_TEXTENCODING_ISO_8859_7;
     888           2 :     case 28598: return RTL_TEXTENCODING_ISO_8859_8;
     889           2 :     case 28599: return RTL_TEXTENCODING_ISO_8859_9;
     890           2 :     case 28605: return RTL_TEXTENCODING_ISO_8859_15;
     891           2 :     case 50220: return RTL_TEXTENCODING_ISO_2022_JP;
     892           2 :     case 50225: return RTL_TEXTENCODING_ISO_2022_KR;
     893           2 :     case 51932: return RTL_TEXTENCODING_EUC_JP;
     894           2 :     case 51936: return RTL_TEXTENCODING_EUC_CN;
     895           2 :     case 51949: return RTL_TEXTENCODING_EUC_KR;
     896           2 :     case 57002: return RTL_TEXTENCODING_ISCII_DEVANAGARI;
     897           2 :     case 65000: return RTL_TEXTENCODING_UTF7;
     898         322 :     case 65001: return RTL_TEXTENCODING_UTF8;
     899         196 :     default: return RTL_TEXTENCODING_DONTKNOW;
     900             :     }
     901             : }
     902             : 
     903             : sal_uInt32 SAL_CALL
     904         318 : rtl_getWindowsCodePageFromTextEncoding(rtl_TextEncoding nEncoding)
     905             : {
     906         318 :     switch (nEncoding)
     907             :     {
     908           2 :     case RTL_TEXTENCODING_IBM_437: return 437;
     909             :  /* case RTL_TEXTENCODING_ISO_8859_6: return 708; */
     910           2 :     case RTL_TEXTENCODING_IBM_737: return 737;
     911           2 :     case RTL_TEXTENCODING_IBM_775: return 775;
     912           2 :     case RTL_TEXTENCODING_IBM_850: return 850;
     913           2 :     case RTL_TEXTENCODING_IBM_852: return 852;
     914           2 :     case RTL_TEXTENCODING_IBM_855: return 855;
     915           2 :     case RTL_TEXTENCODING_IBM_857: return 857;
     916           2 :     case RTL_TEXTENCODING_IBM_860: return 860;
     917           2 :     case RTL_TEXTENCODING_IBM_861: return 861;
     918           2 :     case RTL_TEXTENCODING_IBM_862: return 862;
     919           2 :     case RTL_TEXTENCODING_IBM_863: return 863;
     920           2 :     case RTL_TEXTENCODING_IBM_864: return 864;
     921           2 :     case RTL_TEXTENCODING_IBM_865: return 865;
     922           2 :     case RTL_TEXTENCODING_IBM_866: return 866;
     923           2 :     case RTL_TEXTENCODING_IBM_869: return 869;
     924           2 :     case RTL_TEXTENCODING_MS_874: return 874;
     925           2 :     case RTL_TEXTENCODING_MS_932: return 932;
     926           2 :     case RTL_TEXTENCODING_MS_936: return 936;
     927           2 :     case RTL_TEXTENCODING_MS_949: return 949;
     928           2 :     case RTL_TEXTENCODING_MS_950: return 950;
     929           2 :     case RTL_TEXTENCODING_MS_1250: return 1250;
     930           2 :     case RTL_TEXTENCODING_MS_1251: return 1251;
     931           2 :     case RTL_TEXTENCODING_MS_1252: return 1252;
     932           2 :     case RTL_TEXTENCODING_MS_1253: return 1253;
     933           2 :     case RTL_TEXTENCODING_MS_1254: return 1254;
     934           2 :     case RTL_TEXTENCODING_MS_1255: return 1255;
     935           2 :     case RTL_TEXTENCODING_MS_1256: return 1256;
     936           2 :     case RTL_TEXTENCODING_MS_1257: return 1257;
     937           2 :     case RTL_TEXTENCODING_MS_1258: return 1258;
     938           2 :     case RTL_TEXTENCODING_MS_1361: return 1361;
     939           2 :     case RTL_TEXTENCODING_APPLE_ROMAN: return 10000;
     940           2 :     case RTL_TEXTENCODING_APPLE_JAPANESE: return 10001;
     941           2 :     case RTL_TEXTENCODING_APPLE_CHINTRAD: return 10002;
     942           2 :     case RTL_TEXTENCODING_APPLE_KOREAN: return 10003;
     943           2 :     case RTL_TEXTENCODING_APPLE_ARABIC: return 10004;
     944           2 :     case RTL_TEXTENCODING_APPLE_HEBREW: return 10005;
     945           2 :     case RTL_TEXTENCODING_APPLE_GREEK: return 10006;
     946           2 :     case RTL_TEXTENCODING_APPLE_CYRILLIC: return 10007;
     947           2 :     case RTL_TEXTENCODING_APPLE_CHINSIMP: return 10008;
     948           2 :     case RTL_TEXTENCODING_APPLE_ROMANIAN: return 10010;
     949           2 :     case RTL_TEXTENCODING_APPLE_UKRAINIAN: return 10017;
     950           2 :     case RTL_TEXTENCODING_APPLE_CENTEURO: return 10029;
     951           2 :     case RTL_TEXTENCODING_APPLE_ICELAND: return 10079;
     952           2 :     case RTL_TEXTENCODING_APPLE_TURKISH: return 10081;
     953           2 :     case RTL_TEXTENCODING_APPLE_CROATIAN: return 10082;
     954           2 :     case RTL_TEXTENCODING_ASCII_US: return 20127;
     955           2 :     case RTL_TEXTENCODING_KOI8_R: return 20866;
     956           2 :     case RTL_TEXTENCODING_KOI8_U: return 21866;
     957           2 :     case RTL_TEXTENCODING_ISO_8859_1: return 28591;
     958           2 :     case RTL_TEXTENCODING_ISO_8859_2: return 28592;
     959           2 :     case RTL_TEXTENCODING_ISO_8859_3: return 28593;
     960           2 :     case RTL_TEXTENCODING_ISO_8859_4: return 28594;
     961           2 :     case RTL_TEXTENCODING_ISO_8859_5: return 28595;
     962           2 :     case RTL_TEXTENCODING_ISO_8859_6: return 28596;
     963           2 :     case RTL_TEXTENCODING_ISO_8859_7: return 28597;
     964           2 :     case RTL_TEXTENCODING_ISO_8859_8: return 28598;
     965           2 :     case RTL_TEXTENCODING_ISO_8859_9: return 28599;
     966           2 :     case RTL_TEXTENCODING_ISO_8859_15: return 28605;
     967           2 :     case RTL_TEXTENCODING_ISO_2022_JP: return 50220;
     968           2 :     case RTL_TEXTENCODING_ISO_2022_KR: return 50225;
     969           2 :     case RTL_TEXTENCODING_EUC_JP: return 51932;
     970           2 :     case RTL_TEXTENCODING_EUC_CN: return 51936;
     971           2 :     case RTL_TEXTENCODING_EUC_KR: return 51949;
     972           2 :     case RTL_TEXTENCODING_ISCII_DEVANAGARI: return 57002;
     973           2 :     case RTL_TEXTENCODING_UTF7: return 65000;
     974         182 :     case RTL_TEXTENCODING_UTF8: return 65001;
     975           6 :     default: return 0;
     976             :     }
     977             : }
     978             : 
     979             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10