LCOV - libreoffice_filtered.info - libreoffice/sal/textenc/tencinfo.cxx

LCOV - code coverage report

Current view:	top level - libreoffice/sal/textenc - tencinfo.cxx (source / functions)		Hit	Total	Coverage
Test:	libreoffice_filtered.info	Lines:	283	288	98.3 %
Date:	2012-12-27	Functions:	14	14	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "sal/config.h"
      21             : 
      22             : #include <cstddef>
      23             : #include <cstring>
      24             : 
      25             : #include "rtl/tencinfo.h"
      26             : 
      27             : #include "gettextencodingdata.hxx"
      28             : #include "tenchelp.hxx"
      29             : 
      30           5 : sal_Bool SAL_CALL rtl_isOctetTextEncoding(rtl_TextEncoding nEncoding)
      31             : {
      32             :     return
      33             :         nEncoding > RTL_TEXTENCODING_DONTKNOW
      34             :         && nEncoding != 9 // RTL_TEXTENCODING_SYSTEM
      35           5 :         && nEncoding <= RTL_TEXTENCODING_ADOBE_DINGBATS; // always update this!
      36             : }
      37             : 
      38             : /* ======================================================================= */
      39             : 
      40         110 : static void Impl_toAsciiLower( const char* pName, char* pBuf )
      41             : {
      42        1168 :     while ( *pName )
      43             :     {
      44             :         /* A-Z */
      45         948 :         if ( (*pName >= 0x41) && (*pName <= 0x5A) )
      46          90 :             *pBuf = (*pName)+0x20;  /* toAsciiLower */
      47             :         else
      48         858 :             *pBuf = *pName;
      49             : 
      50         948 :         pBuf++;
      51         948 :         pName++;
      52             :     }
      53             : 
      54         110 :     *pBuf = '\0';
      55         110 : }
      56             : 
      57             : /* ----------------------------------------------------------------------- */
      58             : 
      59         282 : static void Impl_toAsciiLowerAndRemoveNonAlphanumeric( const char* pName, char* pBuf )
      60             : {
      61        3112 :     while ( *pName )
      62             :     {
      63             :         /* A-Z */
      64        2548 :         if ( (*pName >= 0x41) && (*pName <= 0x5A) )
      65             :         {
      66         206 :             *pBuf = (*pName)+0x20;  /* toAsciiLower */
      67         206 :             pBuf++;
      68             :         }
      69             :         /* a-z, 0-9 */
      70        2342 :         else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) ||
      71             :                   ((*pName >= 0x30) && (*pName <= 0x39)) )
      72             :         {
      73        2073 :             *pBuf = *pName;
      74        2073 :             pBuf++;
      75             :         }
      76             : 
      77        2548 :         pName++;
      78             :     }
      79             : 
      80         282 :     *pBuf = '\0';
      81         282 : }
      82             : 
      83             : /* ----------------------------------------------------------------------- */
      84             : 
      85             : /* pMatchStr must match with all characters in pCompStr */
      86       11397 : static bool Impl_matchString( const char* pCompStr, const char* pMatchStr )
      87             : {
      88             :     /* We test only for end in MatchStr, because the last 0 character from */
      89             :     /* pCompStr is unequal a character in MatchStr, so the loop terminates */
      90       27925 :     while ( *pMatchStr )
      91             :     {
      92       16293 :         if ( *pCompStr != *pMatchStr )
      93       11162 :             return false;
      94             : 
      95        5131 :         pCompStr++;
      96        5131 :         pMatchStr++;
      97             :     }
      98             : 
      99         235 :     return true;
     100             : }
     101             : 
     102             : /* ======================================================================= */
     103             : 
     104             : struct ImplStrCharsetDef
     105             : {
     106             :     const char*             mpCharsetStr;
     107             :     rtl_TextEncoding            meTextEncoding;
     108             : };
     109             : 
     110             : struct ImplStrFirstPartCharsetDef
     111             : {
     112             :     const char*             mpCharsetStr;
     113             :     const ImplStrCharsetDef*    mpSecondPartTab;
     114             : };
     115             : 
     116             : /* ======================================================================= */
     117             : 
     118      104304 : sal_Bool SAL_CALL rtl_getTextEncodingInfo( rtl_TextEncoding eTextEncoding, rtl_TextEncodingInfo* pEncInfo )
     119             : {
     120             :     const ImplTextEncodingData* pData;
     121             : 
     122      104304 :     pData = Impl_getTextEncodingData( eTextEncoding );
     123      104304 :     if ( !pData )
     124             :     {
     125             :         /* HACK: For not implemented encoding, because not all
     126             :            calls handle the errors */
     127          91 :         if ( pEncInfo->StructSize < 5 )
     128           1 :             return false;
     129          90 :         pEncInfo->MinimumCharSize = 1;
     130             : 
     131          90 :         if ( pEncInfo->StructSize < 6 )
     132           1 :             return true;
     133          89 :         pEncInfo->MaximumCharSize = 1;
     134             : 
     135          89 :         if ( pEncInfo->StructSize < 7 )
     136           1 :             return true;
     137          88 :         pEncInfo->AverageCharSize = 1;
     138             : 
     139          88 :         if ( pEncInfo->StructSize < 12 )
     140           1 :             return true;
     141          87 :         pEncInfo->Flags = 0;
     142             : 
     143          87 :         return false;
     144             :     }
     145             : 
     146      104213 :     if ( pEncInfo->StructSize < 5 )
     147           1 :         return false;
     148      104212 :     pEncInfo->MinimumCharSize = pData->mnMinCharSize;
     149             : 
     150      104212 :     if ( pEncInfo->StructSize < 6 )
     151           1 :         return true;
     152      104211 :     pEncInfo->MaximumCharSize = pData->mnMaxCharSize;
     153             : 
     154      104211 :     if ( pEncInfo->StructSize < 7 )
     155           1 :         return true;
     156      104210 :     pEncInfo->AverageCharSize = pData->mnAveCharSize;
     157             : 
     158      104210 :     if ( pEncInfo->StructSize < 12 )
     159           1 :         return true;
     160      104209 :     pEncInfo->Flags = pData->mnInfoFlags;
     161             : 
     162      104209 :     return true;
     163             : }
     164             : 
     165             : /* ======================================================================= */
     166             : 
     167        4251 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromWindowsCharset( sal_uInt8 nWinCharset )
     168             : {
     169             :     rtl_TextEncoding eTextEncoding;
     170             : 
     171        4251 :     switch ( nWinCharset )
     172             :     {
     173        2215 :         case 0:     eTextEncoding = RTL_TEXTENCODING_MS_1252; break;    /* ANSI_CHARSET */
     174         377 :         case 2:     eTextEncoding = RTL_TEXTENCODING_SYMBOL; break;     /* SYMBOL_CHARSET */
     175           1 :         case 77:    eTextEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break;/* MAC_CHARSET */
     176         120 :         case 128:   eTextEncoding = RTL_TEXTENCODING_MS_932; break;     /* SHIFTJIS_CHARSET */
     177           3 :         case 129:   eTextEncoding = RTL_TEXTENCODING_MS_949; break;     /* HANGEUL_CHARSET */
     178           2 :         case 130:   eTextEncoding = RTL_TEXTENCODING_MS_1361; break;    /* JOHAB_CHARSET */
     179         109 :         case 134:   eTextEncoding = RTL_TEXTENCODING_MS_936; break;     /* GB2312_CHARSET */
     180           5 :         case 136:   eTextEncoding = RTL_TEXTENCODING_MS_950; break;     /* CHINESEBIG5_CHARSET */
     181           5 :         case 161:   eTextEncoding = RTL_TEXTENCODING_MS_1253; break;    /* GREEK_CHARSET */
     182           4 :         case 162:   eTextEncoding = RTL_TEXTENCODING_MS_1254; break;    /* TURKISH_CHARSET */
     183           1 :         case 163:   eTextEncoding = RTL_TEXTENCODING_MS_1258; break;    /* VIETNAMESE_CHARSET !!! */
     184           3 :         case 177:   eTextEncoding = RTL_TEXTENCODING_MS_1255; break;    /* HEBREW_CHARSET */
     185        1230 :         case 178:   eTextEncoding = RTL_TEXTENCODING_MS_1256; break;    /* ARABIC_CHARSET */
     186           3 :         case 186:   eTextEncoding = RTL_TEXTENCODING_MS_1257; break;    /* BALTIC_CHARSET */
     187          12 :         case 204:   eTextEncoding = RTL_TEXTENCODING_MS_1251; break;    /* RUSSIAN_CHARSET */
     188           2 :         case 222:   eTextEncoding = RTL_TEXTENCODING_MS_874; break;     /* THAI_CHARSET */
     189          42 :         case 238:   eTextEncoding = RTL_TEXTENCODING_MS_1250; break;    /* EASTEUROPE_CHARSET */
     190           0 :         case 255:   eTextEncoding = RTL_TEXTENCODING_IBM_850; break;    /* OEM_CHARSET */
     191         117 :         default:    eTextEncoding = RTL_TEXTENCODING_DONTKNOW; break;
     192             :     };
     193             : 
     194        4251 :     return eTextEncoding;
     195             : }
     196             : 
     197             : /* ----------------------------------------------------------------------- */
     198             : 
     199         110 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromUnixCharset( const char* pUnixCharset )
     200             : {
     201             :     /* See <ftp://ftp.x.org/pub/DOCS/registry>, section 14 ("Font Charset
     202             :      * (Registry and Encoding) Names").
     203             :      */
     204             : 
     205             :     /* All Identifiers in the tables are lower case The function search */
     206             :     /* for the first matching string in the tables. */
     207             :     /* Sort order: unique (first 14, than 1), important */
     208             : 
     209             :     static ImplStrCharsetDef const aUnixCharsetISOTab[] =
     210             :     {
     211             :         { "15", RTL_TEXTENCODING_ISO_8859_15 },
     212             :         { "14", RTL_TEXTENCODING_ISO_8859_14 },
     213             :         { "13", RTL_TEXTENCODING_ISO_8859_13 },
     214             :         { "11", RTL_TEXTENCODING_TIS_620 },
     215             :         { "10", RTL_TEXTENCODING_ISO_8859_10 },
     216             :         { "1", RTL_TEXTENCODING_ISO_8859_1 },
     217             :         { "2", RTL_TEXTENCODING_ISO_8859_2 },
     218             :         { "3", RTL_TEXTENCODING_ISO_8859_3 },
     219             :         { "4", RTL_TEXTENCODING_ISO_8859_4 },
     220             :         { "5", RTL_TEXTENCODING_ISO_8859_5 },
     221             :         { "6", RTL_TEXTENCODING_ISO_8859_6 },
     222             :         { "7", RTL_TEXTENCODING_ISO_8859_7 },
     223             :         { "8", RTL_TEXTENCODING_ISO_8859_8 },
     224             :         { "9", RTL_TEXTENCODING_ISO_8859_9 },
     225             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     226             :     };
     227             : 
     228             :     static ImplStrCharsetDef const aUnixCharsetADOBETab[] =
     229             :     {
     230             :         { "fontspecific", RTL_TEXTENCODING_SYMBOL },
     231             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     232             :     };
     233             : 
     234             :     static ImplStrCharsetDef const aUnixCharsetMSTab[] =
     235             :     {
     236             :         { "1252", RTL_TEXTENCODING_MS_1252 },
     237             :         { "1250", RTL_TEXTENCODING_MS_1250 },
     238             :         { "1251", RTL_TEXTENCODING_MS_1251 },
     239             :         { "1253", RTL_TEXTENCODING_MS_1253 },
     240             :         { "1254", RTL_TEXTENCODING_MS_1254 },
     241             :         { "1255", RTL_TEXTENCODING_MS_1255 },
     242             :         { "1256", RTL_TEXTENCODING_MS_1256 },
     243             :         { "1257", RTL_TEXTENCODING_MS_1257 },
     244             :         { "1258", RTL_TEXTENCODING_MS_1258 },
     245             :         { "932", RTL_TEXTENCODING_MS_932 },
     246             :         { "936", RTL_TEXTENCODING_MS_936 },
     247             :         { "949", RTL_TEXTENCODING_MS_949 },
     248             :         { "950", RTL_TEXTENCODING_MS_950 },
     249             :         { "1361", RTL_TEXTENCODING_MS_1361 },
     250             :         { "cp1252", RTL_TEXTENCODING_MS_1252 },
     251             :         { "cp1250", RTL_TEXTENCODING_MS_1250 },
     252             :         { "cp1251", RTL_TEXTENCODING_MS_1251 },
     253             :         { "cp1253", RTL_TEXTENCODING_MS_1253 },
     254             :         { "cp1254", RTL_TEXTENCODING_MS_1254 },
     255             :         { "cp1255", RTL_TEXTENCODING_MS_1255 },
     256             :         { "cp1256", RTL_TEXTENCODING_MS_1256 },
     257             :         { "cp1257", RTL_TEXTENCODING_MS_1257 },
     258             :         { "cp1258", RTL_TEXTENCODING_MS_1258 },
     259             :         { "cp932", RTL_TEXTENCODING_MS_932 },
     260             :         { "cp936", RTL_TEXTENCODING_MS_936 },
     261             :         { "cp949", RTL_TEXTENCODING_MS_949 },
     262             :         { "cp950", RTL_TEXTENCODING_MS_950 },
     263             :         { "cp1361", RTL_TEXTENCODING_MS_1361 },
     264             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     265             :     };
     266             : 
     267             :     static ImplStrCharsetDef const aUnixCharsetIBMTab[] =
     268             :     {
     269             :         { "437", RTL_TEXTENCODING_IBM_437 },
     270             :         { "850", RTL_TEXTENCODING_IBM_850 },
     271             :         { "860", RTL_TEXTENCODING_IBM_860 },
     272             :         { "861", RTL_TEXTENCODING_IBM_861 },
     273             :         { "863", RTL_TEXTENCODING_IBM_863 },
     274             :         { "865", RTL_TEXTENCODING_IBM_865 },
     275             :         { "737", RTL_TEXTENCODING_IBM_737 },
     276             :         { "775", RTL_TEXTENCODING_IBM_775 },
     277             :         { "852", RTL_TEXTENCODING_IBM_852 },
     278             :         { "855", RTL_TEXTENCODING_IBM_855 },
     279             :         { "857", RTL_TEXTENCODING_IBM_857 },
     280             :         { "862", RTL_TEXTENCODING_IBM_862 },
     281             :         { "864", RTL_TEXTENCODING_IBM_864 },
     282             :         { "866", RTL_TEXTENCODING_IBM_866 },
     283             :         { "869", RTL_TEXTENCODING_IBM_869 },
     284             :         { "874", RTL_TEXTENCODING_MS_874 },
     285             :         { "1004", RTL_TEXTENCODING_MS_1252 },
     286             :         { "65400", RTL_TEXTENCODING_SYMBOL },
     287             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     288             :     };
     289             : 
     290             :     static ImplStrCharsetDef const aUnixCharsetKOI8Tab[] =
     291             :     {
     292             :         { "r", RTL_TEXTENCODING_KOI8_R },
     293             :         { "u", RTL_TEXTENCODING_KOI8_U },
     294             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     295             :     };
     296             : 
     297             :     static ImplStrCharsetDef aUnixCharsetJISX0208Tab[] =
     298             :     {
     299             :         { NULL, RTL_TEXTENCODING_JIS_X_0208 }
     300             :     };
     301             : 
     302             :     static ImplStrCharsetDef aUnixCharsetJISX0201Tab[] =
     303             :     {
     304             :         { NULL, RTL_TEXTENCODING_JIS_X_0201 }
     305             :     };
     306             : 
     307             :     static ImplStrCharsetDef aUnixCharsetJISX0212Tab[] =
     308             :     {
     309             :         { NULL, RTL_TEXTENCODING_JIS_X_0212 }
     310             :     };
     311             : 
     312             :     static ImplStrCharsetDef aUnixCharsetGBTab[] =
     313             :     {
     314             :         { NULL, RTL_TEXTENCODING_GB_2312 }
     315             :     };
     316             : 
     317             :     static ImplStrCharsetDef aUnixCharsetGBKTab[] =
     318             :     {
     319             :         { NULL, RTL_TEXTENCODING_GBK }
     320             :     };
     321             : 
     322             :     static ImplStrCharsetDef aUnixCharsetBIG5Tab[] =
     323             :     {
     324             :         { NULL, RTL_TEXTENCODING_BIG5 }
     325             :     };
     326             : 
     327             :     static ImplStrCharsetDef const aUnixCharsetKSC56011987Tab[] =
     328             :     {
     329             :         { NULL, RTL_TEXTENCODING_EUC_KR }
     330             :     };
     331             : 
     332             :     static ImplStrCharsetDef const aUnixCharsetKSC56011992Tab[] =
     333             :     {
     334             :         { NULL, RTL_TEXTENCODING_MS_1361 }
     335             :     };
     336             : 
     337             :     static ImplStrCharsetDef const aUnixCharsetISO10646Tab[] =
     338             :     {
     339             :         { NULL, RTL_TEXTENCODING_UNICODE }
     340             :     };
     341             : 
     342             :     static ImplStrCharsetDef const aUnixCharsetUNICODETab[] =
     343             :     {
     344             : /* Currently every Unicode Encoding is for us Unicode */
     345             : /*        { "fontspecific", RTL_TEXTENCODING_UNICODE }, */
     346             :         { NULL, RTL_TEXTENCODING_UNICODE }
     347             :     };
     348             : 
     349             :     static ImplStrCharsetDef const aUnixCharsetSymbolTab[] =
     350             :     {
     351             :         { NULL, RTL_TEXTENCODING_SYMBOL }
     352             :     };
     353             : 
     354             :     /* See <http://cvs.freedesktop.org/xorg/xc/fonts/encodings/iso8859-11.enc?
     355             :        rev=1.1.1.1>: */
     356             :     static ImplStrCharsetDef const aUnixCharsetTIS620Tab[] =
     357             :     {
     358             :         { "0", RTL_TEXTENCODING_TIS_620 },
     359             :         { "2529", RTL_TEXTENCODING_TIS_620 },
     360             :         { "2533", RTL_TEXTENCODING_TIS_620 },
     361             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     362             :     };
     363             :     static ImplStrCharsetDef const aUnixCharsetTIS6202529Tab[] =
     364             :     {
     365             :         { "1", RTL_TEXTENCODING_TIS_620 },
     366             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     367             :     };
     368             :     static ImplStrCharsetDef const aUnixCharsetTIS6202533Tab[] =
     369             :     {
     370             :         { "0", RTL_TEXTENCODING_TIS_620 },
     371             :         { "1", RTL_TEXTENCODING_TIS_620 },
     372             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     373             :     };
     374             : 
     375             :     static ImplStrFirstPartCharsetDef const aUnixCharsetFirstPartTab[] =
     376             :     {
     377             :         { "iso8859", aUnixCharsetISOTab },
     378             :         { "adobe", aUnixCharsetADOBETab },
     379             :         { "ansi", aUnixCharsetMSTab },
     380             :         { "microsoft", aUnixCharsetMSTab },
     381             :         { "ibm", aUnixCharsetIBMTab },
     382             :         { "koi8", aUnixCharsetKOI8Tab },
     383             :         { "jisx0208", aUnixCharsetJISX0208Tab },
     384             :         { "jisx0208.1983", aUnixCharsetJISX0208Tab },
     385             :         { "jisx0201", aUnixCharsetJISX0201Tab },
     386             :         { "jisx0201.1976", aUnixCharsetJISX0201Tab },
     387             :         { "jisx0212", aUnixCharsetJISX0212Tab },
     388             :         { "jisx0212.1990", aUnixCharsetJISX0212Tab },
     389             :         { "gb2312", aUnixCharsetGBTab },
     390             :         { "gbk", aUnixCharsetGBKTab },
     391             :         { "big5", aUnixCharsetBIG5Tab },
     392             :         { "iso10646", aUnixCharsetISO10646Tab },
     393             : /*      { "unicode", aUnixCharsetUNICODETab }, */ /* fonts contain only default chars */
     394             :         { "sunolcursor", aUnixCharsetSymbolTab },
     395             :         { "sunolglyph", aUnixCharsetSymbolTab },
     396             :         { "iso10646", aUnixCharsetUNICODETab },
     397             :         { "ksc5601.1987", aUnixCharsetKSC56011987Tab },
     398             :         { "ksc5601.1992", aUnixCharsetKSC56011992Tab },
     399             :         { "tis620.2529", aUnixCharsetTIS6202529Tab },
     400             :         { "tis620.2533", aUnixCharsetTIS6202533Tab },
     401             :         { "tis620", aUnixCharsetTIS620Tab },
     402             : /*        { "sunudcja.1997",  },        */
     403             : /*        { "sunudcko.1997",  },        */
     404             : /*        { "sunudczh.1997",  },        */
     405             : /*        { "sunudczhtw.1997",  },      */
     406             :         { NULL, NULL }
     407             :     };
     408             : 
     409         110 :     rtl_TextEncoding    eEncoding = RTL_TEXTENCODING_DONTKNOW;
     410             :     char*           pBuf;
     411             :     char*           pTempBuf;
     412         110 :     sal_uInt32          nBufLen = strlen( pUnixCharset )+1;
     413             :     const char*     pFirstPart;
     414             :     const char*     pSecondPart;
     415             : 
     416             :     /* Alloc Buffer and map to lower case */
     417         110 :     pBuf = new char[nBufLen];
     418         110 :     Impl_toAsciiLower( pUnixCharset, pBuf );
     419             : 
     420             :     /* Search FirstPart */
     421         110 :     pFirstPart = pBuf;
     422         110 :     pSecondPart = NULL;
     423         110 :     pTempBuf = pBuf;
     424         905 :     while ( *pTempBuf )
     425             :     {
     426         791 :         if ( *pTempBuf == '-' )
     427             :         {
     428         106 :             *pTempBuf = '\0';
     429         106 :             pSecondPart = pTempBuf+1;
     430         106 :             break;
     431             :         }
     432             : 
     433         685 :         pTempBuf++;
     434             :     }
     435             : 
     436             :     /* Parttrenner gefunden */
     437         110 :     if ( pSecondPart )
     438             :     {
     439             :         /* Search for the part tab */
     440         106 :         const ImplStrFirstPartCharsetDef* pFirstPartData = aUnixCharsetFirstPartTab;
     441        1057 :         while ( pFirstPartData->mpCharsetStr )
     442             :         {
     443         926 :             if ( Impl_matchString( pFirstPart, pFirstPartData->mpCharsetStr ) )
     444             :             {
     445             :                 /* Search for the charset in the second part tab */
     446          81 :                 const ImplStrCharsetDef* pData = pFirstPartData->mpSecondPartTab;
     447         674 :                 while ( pData->mpCharsetStr )
     448             :                 {
     449         586 :                     if ( Impl_matchString( pSecondPart, pData->mpCharsetStr ) )
     450             :                     {
     451          74 :                         eEncoding = pData->meTextEncoding;
     452          74 :                         break;
     453             :                     }
     454             : 
     455         512 :                     pData++;
     456             :                 }
     457             : 
     458             :                 /* use default encoding for first part */
     459          81 :                 eEncoding = pData->meTextEncoding;
     460          81 :                 break;
     461             :             }
     462             : 
     463         845 :             pFirstPartData++;
     464             :         }
     465             :     }
     466             : 
     467         110 :     delete[] pBuf;
     468             : 
     469         110 :     return eEncoding;
     470             : }
     471             : 
     472             : /* ----------------------------------------------------------------------- */
     473             : 
     474         282 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromMimeCharset( const char* pMimeCharset )
     475             : {
     476             :     /* All Identifiers are in lower case and contain only alphanumeric */
     477             :     /* characters. The function search for the first equal string in */
     478             :     /* the table. In this table are only the most used mime types. */
     479             :     /* Sort order: important */
     480             :     static ImplStrCharsetDef const aVIPMimeCharsetTab[] =
     481             :     {
     482             :         { "usascii", RTL_TEXTENCODING_ASCII_US },
     483             :         { "utf8", RTL_TEXTENCODING_UTF8 },
     484             :         { "utf7", RTL_TEXTENCODING_UTF7 },
     485             :         { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
     486             :         { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
     487             :         { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
     488             :         { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
     489             :         { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
     490             :         { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
     491             :         { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
     492             :         { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
     493             :         { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
     494             :         { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
     495             :         { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
     496             :         { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
     497             :         { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
     498             :         { "iso2022jp", RTL_TEXTENCODING_ISO_2022_JP },
     499             :         { "iso2022jp2", RTL_TEXTENCODING_ISO_2022_JP },
     500             :         { "iso2022cn", RTL_TEXTENCODING_ISO_2022_CN },
     501             :         { "iso2022cnext", RTL_TEXTENCODING_ISO_2022_CN },
     502             :         { "iso2022kr", RTL_TEXTENCODING_ISO_2022_KR },
     503             :         { "eucjp", RTL_TEXTENCODING_EUC_JP },
     504             :         { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
     505             :         { "mskanji", RTL_TEXTENCODING_MS_932 },
     506             :         { "gb2312", RTL_TEXTENCODING_GB_2312 },
     507             :         { "cngb", RTL_TEXTENCODING_GB_2312 },
     508             :         { "big5", RTL_TEXTENCODING_BIG5 },
     509             :         { "cnbig5", RTL_TEXTENCODING_BIG5 },
     510             :         { "cngb12345", RTL_TEXTENCODING_GBT_12345 },
     511             :         { "euckr", RTL_TEXTENCODING_EUC_KR },
     512             :         { "koi8r", RTL_TEXTENCODING_KOI8_R },
     513             :         { "windows1252", RTL_TEXTENCODING_MS_1252 },
     514             :         { "windows1250", RTL_TEXTENCODING_MS_1250 },
     515             :         { "windows1251", RTL_TEXTENCODING_MS_1251 },
     516             :         { "windows1253", RTL_TEXTENCODING_MS_1253 },
     517             :         { "windows1254", RTL_TEXTENCODING_MS_1254 },
     518             :         { "windows1255", RTL_TEXTENCODING_MS_1255 },
     519             :         { "windows1256", RTL_TEXTENCODING_MS_1256 },
     520             :         { "windows1257", RTL_TEXTENCODING_MS_1257 },
     521             :         { "windows1258", RTL_TEXTENCODING_MS_1258 },
     522             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     523             :     };
     524             : 
     525             :     /* All Identifiers are in lower case and contain only alphanumeric */
     526             :     /* characters. The function search for the first matching string in */
     527             :     /* the table. */
     528             :     /* Sort order: unique (first iso885914, than iso88591), important */
     529             :     static ImplStrCharsetDef const aMimeCharsetTab[] =
     530             :     {
     531             :         { "unicode11utf7", RTL_TEXTENCODING_UTF7 },
     532             :         { "caunicode11utf7", RTL_TEXTENCODING_UTF7 },
     533             :         { "iso88591windows30", RTL_TEXTENCODING_ISO_8859_1 },
     534             :         { "iso88591win", RTL_TEXTENCODING_MS_1252 },
     535             :         { "iso88592win", RTL_TEXTENCODING_MS_1250 },
     536             :         { "iso88599win", RTL_TEXTENCODING_MS_1254 },
     537             :         { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
     538             :         { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
     539             :         { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
     540             :         { "iso885911", RTL_TEXTENCODING_TIS_620 },
     541             :             /* This is no official MIME character set name, but it might be in
     542             :                use in Thailand. */
     543             :         { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
     544             :         { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
     545             :         { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
     546             :         { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
     547             :         { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
     548             :         { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
     549             :         { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
     550             :         { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
     551             :         { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
     552             :         { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
     553             :         { "isoir100", RTL_TEXTENCODING_ISO_8859_1 },
     554             :         { "latin1", RTL_TEXTENCODING_ISO_8859_1 },
     555             :         { "l1", RTL_TEXTENCODING_ISO_8859_1 },
     556             :         { "cp819", RTL_TEXTENCODING_ISO_8859_1 },
     557             :         { "ibm819", RTL_TEXTENCODING_ISO_8859_1 },
     558             :         { "csisolatin1", RTL_TEXTENCODING_ISO_8859_1 },
     559             :         { "isoir101", RTL_TEXTENCODING_ISO_8859_2 },
     560             :         { "latin2", RTL_TEXTENCODING_ISO_8859_2 },
     561             :         { "l2", RTL_TEXTENCODING_ISO_8859_2 },
     562             :         { "csisolatin2", RTL_TEXTENCODING_ISO_8859_2 },
     563             :         { "isoir109", RTL_TEXTENCODING_ISO_8859_3 },
     564             :         { "latin3", RTL_TEXTENCODING_ISO_8859_3 },
     565             :         { "l3", RTL_TEXTENCODING_ISO_8859_3 },
     566             :         { "csisolatin3", RTL_TEXTENCODING_ISO_8859_3 },
     567             :         { "isoir110", RTL_TEXTENCODING_ISO_8859_4 },
     568             :         { "latin4", RTL_TEXTENCODING_ISO_8859_4 },
     569             :         { "l4", RTL_TEXTENCODING_ISO_8859_4 },
     570             :         { "csisolatin4", RTL_TEXTENCODING_ISO_8859_4 },
     571             :         { "isoir144", RTL_TEXTENCODING_ISO_8859_5 },
     572             :         { "cyrillicasian", RTL_TEXTENCODING_PT154 },
     573             :         { "cyrillic", RTL_TEXTENCODING_ISO_8859_5 },
     574             :         { "csisolatincyrillic", RTL_TEXTENCODING_ISO_8859_5 },
     575             :         { "isoir127", RTL_TEXTENCODING_ISO_8859_6 },
     576             :         { "arabic", RTL_TEXTENCODING_ISO_8859_6 },
     577             :         { "csisolatinarabic", RTL_TEXTENCODING_ISO_8859_6 },
     578             :         { "ecma114", RTL_TEXTENCODING_ISO_8859_6 },
     579             :         { "asmo708", RTL_TEXTENCODING_ISO_8859_6 },
     580             :         { "isoir126", RTL_TEXTENCODING_ISO_8859_7 },
     581             :         { "greek", RTL_TEXTENCODING_ISO_8859_7 },
     582             :         { "csisolatingreek", RTL_TEXTENCODING_ISO_8859_7 },
     583             :         { "elot928", RTL_TEXTENCODING_ISO_8859_7 },
     584             :         { "ecma118", RTL_TEXTENCODING_ISO_8859_7 },
     585             :         { "isoir138", RTL_TEXTENCODING_ISO_8859_8 },
     586             :         { "hebrew", RTL_TEXTENCODING_ISO_8859_8 },
     587             :         { "csisolatinhebrew", RTL_TEXTENCODING_ISO_8859_8 },
     588             :         { "isoir148", RTL_TEXTENCODING_ISO_8859_9 },
     589             :         { "latin5", RTL_TEXTENCODING_ISO_8859_9 },
     590             :         { "l5", RTL_TEXTENCODING_ISO_8859_9 },
     591             :         { "csisolatin5", RTL_TEXTENCODING_ISO_8859_9 },
     592             :         { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
     593             :         { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
     594             :         { "cswindows31latin1", RTL_TEXTENCODING_MS_1252 },
     595             :         { "cswindows31latin2", RTL_TEXTENCODING_MS_1250 },
     596             :         { "cswindows31latin5", RTL_TEXTENCODING_MS_1254 },
     597             :         { "iso10646us", RTL_TEXTENCODING_ASCII_US },
     598             :         { "iso646irv", RTL_TEXTENCODING_ASCII_US },
     599             :         { "cskoi8r", RTL_TEXTENCODING_KOI8_R },
     600             :         { "ibm437", RTL_TEXTENCODING_IBM_437 },
     601             :         { "cp437", RTL_TEXTENCODING_IBM_437 },
     602             :         { "437", RTL_TEXTENCODING_IBM_437 },
     603             :         { "cspc8codepage437", RTL_TEXTENCODING_IBM_437 },
     604             :         { "ansix34", RTL_TEXTENCODING_ASCII_US },
     605             :         { "ibm367", RTL_TEXTENCODING_ASCII_US },
     606             :         { "cp367", RTL_TEXTENCODING_ASCII_US },
     607             :         { "csascii", RTL_TEXTENCODING_ASCII_US },
     608             :         { "ibm775", RTL_TEXTENCODING_IBM_775 },
     609             :         { "cp775", RTL_TEXTENCODING_IBM_775 },
     610             :         { "cspc775baltic", RTL_TEXTENCODING_IBM_775 },
     611             :         { "ibm850", RTL_TEXTENCODING_IBM_850 },
     612             :         { "cp850", RTL_TEXTENCODING_IBM_850 },
     613             :         { "850", RTL_TEXTENCODING_IBM_850 },
     614             :         { "cspc850multilingual", RTL_TEXTENCODING_IBM_850 },
     615             : /*        { "ibm851", RTL_TEXTENCODING_IBM_851 }, */
     616             : /*        { "cp851", RTL_TEXTENCODING_IBM_851 }, */
     617             : /*        { "851", RTL_TEXTENCODING_IBM_851 }, */
     618             : /*        { "csibm851", RTL_TEXTENCODING_IBM_851 }, */
     619             :         { "ibm852", RTL_TEXTENCODING_IBM_852 },
     620             :         { "cp852", RTL_TEXTENCODING_IBM_852 },
     621             :         { "852", RTL_TEXTENCODING_IBM_852 },
     622             :         { "cspcp852", RTL_TEXTENCODING_IBM_852 },
     623             :         { "ibm855", RTL_TEXTENCODING_IBM_855 },
     624             :         { "cp855", RTL_TEXTENCODING_IBM_855 },
     625             :         { "855", RTL_TEXTENCODING_IBM_855 },
     626             :         { "csibm855", RTL_TEXTENCODING_IBM_855 },
     627             :         { "ibm857", RTL_TEXTENCODING_IBM_857 },
     628             :         { "cp857", RTL_TEXTENCODING_IBM_857 },
     629             :         { "857", RTL_TEXTENCODING_IBM_857 },
     630             :         { "csibm857", RTL_TEXTENCODING_IBM_857 },
     631             :         { "ibm860", RTL_TEXTENCODING_IBM_860 },
     632             :         { "cp860", RTL_TEXTENCODING_IBM_860 },
     633             :         { "860", RTL_TEXTENCODING_IBM_860 },
     634             :         { "csibm860", RTL_TEXTENCODING_IBM_860 },
     635             :         { "ibm861", RTL_TEXTENCODING_IBM_861 },
     636             :         { "cp861", RTL_TEXTENCODING_IBM_861 },
     637             :         { "861", RTL_TEXTENCODING_IBM_861 },
     638             :         { "csis", RTL_TEXTENCODING_IBM_861 },
     639             :         { "csibm861", RTL_TEXTENCODING_IBM_861 },
     640             :         { "ibm862", RTL_TEXTENCODING_IBM_862 },
     641             :         { "cp862", RTL_TEXTENCODING_IBM_862 },
     642             :         { "862", RTL_TEXTENCODING_IBM_862 },
     643             :         { "cspc862latinhebrew", RTL_TEXTENCODING_IBM_862 },
     644             :         { "ibm863", RTL_TEXTENCODING_IBM_863 },
     645             :         { "cp863", RTL_TEXTENCODING_IBM_863 },
     646             :         { "863", RTL_TEXTENCODING_IBM_863 },
     647             :         { "csibm863", RTL_TEXTENCODING_IBM_863 },
     648             :         { "ibm864", RTL_TEXTENCODING_IBM_864 },
     649             :         { "cp864", RTL_TEXTENCODING_IBM_864 },
     650             :         { "864", RTL_TEXTENCODING_IBM_864 },
     651             :         { "csibm864", RTL_TEXTENCODING_IBM_864 },
     652             :         { "ibm865", RTL_TEXTENCODING_IBM_865 },
     653             :         { "cp865", RTL_TEXTENCODING_IBM_865 },
     654             :         { "865", RTL_TEXTENCODING_IBM_865 },
     655             :         { "csibm865", RTL_TEXTENCODING_IBM_865 },
     656             :         { "ibm866", RTL_TEXTENCODING_IBM_866 },
     657             :         { "cp866", RTL_TEXTENCODING_IBM_866 },
     658             :         { "866", RTL_TEXTENCODING_IBM_866 },
     659             :         { "csibm866", RTL_TEXTENCODING_IBM_866 },
     660             : /*        { "ibm868", RTL_TEXTENCODING_IBM_868 }, */
     661             : /*        { "cp868", RTL_TEXTENCODING_IBM_868 }, */
     662             : /*        { "cpar", RTL_TEXTENCODING_IBM_868 }, */
     663             : /*        { "csibm868", RTL_TEXTENCODING_IBM_868 }, */
     664             :         { "ibm869", RTL_TEXTENCODING_IBM_869 },
     665             :         { "cp869", RTL_TEXTENCODING_IBM_869 },
     666             :         { "869", RTL_TEXTENCODING_IBM_869 },
     667             :         { "cpgr", RTL_TEXTENCODING_IBM_869 },
     668             :         { "csibm869", RTL_TEXTENCODING_IBM_869 },
     669             :         { "ibm869", RTL_TEXTENCODING_IBM_869 },
     670             :         { "cp869", RTL_TEXTENCODING_IBM_869 },
     671             :         { "869", RTL_TEXTENCODING_IBM_869 },
     672             :         { "cpgr", RTL_TEXTENCODING_IBM_869 },
     673             :         { "csibm869", RTL_TEXTENCODING_IBM_869 },
     674             :         { "mac", RTL_TEXTENCODING_APPLE_ROMAN },
     675             :         { "csmacintosh", RTL_TEXTENCODING_APPLE_ROMAN },
     676             :         { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
     677             :         { "mskanji", RTL_TEXTENCODING_MS_932 },
     678             :         { "csshiftjis", RTL_TEXTENCODING_SHIFT_JIS },
     679             :         { "jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
     680             :         { "jisc62261983", RTL_TEXTENCODING_JIS_X_0208 },
     681             :         { "csiso87jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
     682             :         { "isoir86", RTL_TEXTENCODING_JIS_X_0208 },
     683             :         { "x0208", RTL_TEXTENCODING_JIS_X_0208 },
     684             :         { "jisx0201", RTL_TEXTENCODING_JIS_X_0201 },
     685             :         { "cshalfwidthkatakana", RTL_TEXTENCODING_JIS_X_0201 },
     686             :         { "x0201", RTL_TEXTENCODING_JIS_X_0201 },
     687             :         { "jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
     688             :         { "csiso159jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
     689             :         { "isoir159", RTL_TEXTENCODING_JIS_X_0208 },
     690             :         { "x0212", RTL_TEXTENCODING_JIS_X_0212 },
     691             :         { "isoir6", RTL_TEXTENCODING_ASCII_US },
     692             :         { "xsjis", RTL_TEXTENCODING_SHIFT_JIS },
     693             :         { "sjis", RTL_TEXTENCODING_SHIFT_JIS },
     694             :         { "ascii", RTL_TEXTENCODING_ASCII_US },
     695             :         { "us", RTL_TEXTENCODING_ASCII_US },
     696             :         { "gb180302000", RTL_TEXTENCODING_GB_18030 },
     697             :             /* This is no actual MIME character set name, it is only in here
     698             :                for backwards compatibility (before "GB18030" was officially
     699             :                registered with IANA, this code contained some guesses of what
     700             :                would become official names for GB18030). */
     701             :         { "gb18030", RTL_TEXTENCODING_GB_18030 },
     702             :         { "big5hkscs", RTL_TEXTENCODING_BIG5_HKSCS },
     703             :         { "tis620", RTL_TEXTENCODING_TIS_620 },
     704             :         { "gbk", RTL_TEXTENCODING_GBK },
     705             :         { "cp936", RTL_TEXTENCODING_GBK },
     706             :         { "ms936", RTL_TEXTENCODING_GBK },
     707             :         { "windows936", RTL_TEXTENCODING_GBK },
     708             :         { "cp874", RTL_TEXTENCODING_MS_874 },
     709             :             /* This is no official MIME character set name, but it might be in
     710             :                use in Thailand. */
     711             :         { "ms874", RTL_TEXTENCODING_MS_874 },
     712             :             /* This is no official MIME character set name, but it might be in
     713             :                use in Thailand. */
     714             :         { "windows874", RTL_TEXTENCODING_MS_874 },
     715             :             /* This is no official MIME character set name, but it might be in
     716             :                use in Thailand. */
     717             :         { "koi8u", RTL_TEXTENCODING_KOI8_U },
     718             :         { "cpis", RTL_TEXTENCODING_IBM_861 },
     719             :         { "ksc56011987", RTL_TEXTENCODING_MS_949 },
     720             :         { "isoir149", RTL_TEXTENCODING_MS_949 },
     721             :         { "ksc56011989", RTL_TEXTENCODING_MS_949 },
     722             :         { "ksc5601", RTL_TEXTENCODING_MS_949 },
     723             :         { "korean", RTL_TEXTENCODING_MS_949 },
     724             :         { "csksc56011987", RTL_TEXTENCODING_MS_949 },
     725             :             /* Map KS_C_5601-1987 and aliases to MS-949 instead of EUC-KR, as
     726             :                this character set identifier seems to be prominently used by MS
     727             :                to stand for KS C 5601 plus MS-949 extensions */
     728             :         { "latin9", RTL_TEXTENCODING_ISO_8859_15 },
     729             :         { "adobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
     730             :         { "csadobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
     731             :         { "adobesymbolencoding", RTL_TEXTENCODING_ADOBE_SYMBOL },
     732             :         { "cshppsmath", RTL_TEXTENCODING_ADOBE_SYMBOL },
     733             :         { "ptcp154", RTL_TEXTENCODING_PT154 },
     734             :         { "csptcp154", RTL_TEXTENCODING_PT154 },
     735             :         { "pt154", RTL_TEXTENCODING_PT154 },
     736             :         { "cp154", RTL_TEXTENCODING_PT154 },
     737             :         { "xisciide", RTL_TEXTENCODING_ISCII_DEVANAGARI },
     738             :             /* This is not an official MIME character set name, but is in use by
     739             :                various windows APIs. */
     740             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     741             :     };
     742             : 
     743         282 :     rtl_TextEncoding            eEncoding = RTL_TEXTENCODING_DONTKNOW;
     744             :     char*                   pBuf;
     745         282 :     const ImplStrCharsetDef*    pData = aVIPMimeCharsetTab;
     746         282 :     sal_uInt32                  nBufLen = strlen( pMimeCharset )+1;
     747             : 
     748             :     /* Alloc Buffer and map to lower case and remove non alphanumeric chars */
     749         282 :     pBuf = new char[nBufLen];
     750         282 :     Impl_toAsciiLowerAndRemoveNonAlphanumeric( pMimeCharset, pBuf );
     751             : 
     752             :     /* Search for equal in the VIP table */
     753        7777 :     while ( pData->mpCharsetStr )
     754             :     {
     755        7414 :         if ( strcmp( pBuf, pData->mpCharsetStr ) == 0 )
     756             :         {
     757         201 :             eEncoding = pData->meTextEncoding;
     758         201 :             break;
     759             :         }
     760             : 
     761        7213 :         pData++;
     762             :     }
     763             : 
     764             :     /* Search for matching in the mime table */
     765         282 :     if ( eEncoding == RTL_TEXTENCODING_DONTKNOW )
     766             :     {
     767          81 :         pData = aMimeCharsetTab;
     768        9967 :         while ( pData->mpCharsetStr )
     769             :         {
     770        9885 :             if ( Impl_matchString( pBuf, pData->mpCharsetStr ) )
     771             :             {
     772          80 :                 eEncoding = pData->meTextEncoding;
     773          80 :                 break;
     774             :             }
     775             : 
     776        9805 :             pData++;
     777             :         }
     778             :     }
     779             : 
     780         282 :     delete[] pBuf;
     781             : 
     782         282 :     return eEncoding;
     783             : }
     784             : 
     785             : /* ======================================================================= */
     786             : 
     787         529 : sal_uInt8 SAL_CALL rtl_getBestWindowsCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
     788             : {
     789         529 :     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
     790         529 :     if ( pData )
     791         290 :         return pData->mnBestWindowsCharset;
     792             :     else
     793         239 :         return 1;
     794             : }
     795             : 
     796             : /* ----------------------------------------------------------------------- */
     797             : 
     798          80 : const char* SAL_CALL rtl_getBestUnixCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding  )
     799             : {
     800          80 :     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
     801          80 :     if ( pData )
     802          80 :         return (char const *) pData->mpBestUnixCharset;
     803           0 :     else if( eTextEncoding == RTL_TEXTENCODING_UNICODE )
     804           0 :         return (char const *) "iso10646-1";
     805             :     else
     806           0 :         return 0;
     807             : }
     808             : 
     809             : /* ----------------------------------------------------------------------- */
     810             : 
     811         184 : char const * SAL_CALL rtl_getMimeCharsetFromTextEncoding(rtl_TextEncoding
     812             :                                                              nEncoding)
     813             : {
     814         184 :     ImplTextEncodingData const * p = Impl_getTextEncodingData(nEncoding);
     815             :     return p && (p->mnInfoFlags & RTL_TEXTENCODING_INFO_MIME) != 0 ?
     816         184 :                p->mpBestMimeCharset : NULL;
     817             : }
     818             : 
     819         101 : const char* SAL_CALL rtl_getBestMimeCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
     820             : {
     821         101 :     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
     822         101 :     if ( pData )
     823         101 :         return (char const *) pData->mpBestMimeCharset;
     824             :     else
     825           0 :         return 0;
     826             : }
     827             : 
     828             : /* The following two functions are based on <http://www.sharmahd.com/tm/
     829             :    codepages.html>, <http://msdn.microsoft.com/workshop/author/dhtml/reference/
     830             :    charsets/charset4.asp>, and <http://www.iana.org/assignments/character-sets>.
     831             :  */
     832             : 
     833             : rtl_TextEncoding SAL_CALL
     834         417 : rtl_getTextEncodingFromWindowsCodePage(sal_uInt32 nCodePage)
     835             : {
     836         417 :     switch (nCodePage)
     837             :     {
     838           1 :     case 437: return RTL_TEXTENCODING_IBM_437;
     839           1 :     case 708: return RTL_TEXTENCODING_ISO_8859_6;
     840           1 :     case 737: return RTL_TEXTENCODING_IBM_737;
     841           1 :     case 775: return RTL_TEXTENCODING_IBM_775;
     842           1 :     case 850: return RTL_TEXTENCODING_IBM_850;
     843           1 :     case 852: return RTL_TEXTENCODING_IBM_852;
     844           1 :     case 855: return RTL_TEXTENCODING_IBM_855;
     845           1 :     case 857: return RTL_TEXTENCODING_IBM_857;
     846           1 :     case 860: return RTL_TEXTENCODING_IBM_860;
     847           1 :     case 861: return RTL_TEXTENCODING_IBM_861;
     848           1 :     case 862: return RTL_TEXTENCODING_IBM_862;
     849           1 :     case 863: return RTL_TEXTENCODING_IBM_863;
     850           1 :     case 864: return RTL_TEXTENCODING_IBM_864;
     851           1 :     case 865: return RTL_TEXTENCODING_IBM_865;
     852           1 :     case 866: return RTL_TEXTENCODING_IBM_866;
     853           1 :     case 869: return RTL_TEXTENCODING_IBM_869;
     854           2 :     case 874: return RTL_TEXTENCODING_MS_874;
     855          23 :     case 932: return RTL_TEXTENCODING_MS_932;
     856           9 :     case 936: return RTL_TEXTENCODING_MS_936;
     857           1 :     case 949: return RTL_TEXTENCODING_MS_949;
     858           4 :     case 950: return RTL_TEXTENCODING_MS_950;
     859          29 :     case 1250: return RTL_TEXTENCODING_MS_1250;
     860          33 :     case 1251: return RTL_TEXTENCODING_MS_1251;
     861         130 :     case 1252: return RTL_TEXTENCODING_MS_1252;
     862          19 :     case 1253: return RTL_TEXTENCODING_MS_1253;
     863          19 :     case 1254: return RTL_TEXTENCODING_MS_1254;
     864          15 :     case 1255: return RTL_TEXTENCODING_MS_1255;
     865          13 :     case 1256: return RTL_TEXTENCODING_MS_1256;
     866          19 :     case 1257: return RTL_TEXTENCODING_MS_1257;
     867          15 :     case 1258: return RTL_TEXTENCODING_MS_1258;
     868           1 :     case 1361: return RTL_TEXTENCODING_MS_1361;
     869           1 :     case 10000: return RTL_TEXTENCODING_APPLE_ROMAN;
     870           1 :     case 10001: return RTL_TEXTENCODING_APPLE_JAPANESE;
     871           1 :     case 10002: return RTL_TEXTENCODING_APPLE_CHINTRAD;
     872           1 :     case 10003: return RTL_TEXTENCODING_APPLE_KOREAN;
     873           1 :     case 10004: return RTL_TEXTENCODING_APPLE_ARABIC;
     874           1 :     case 10005: return RTL_TEXTENCODING_APPLE_HEBREW;
     875           1 :     case 10006: return RTL_TEXTENCODING_APPLE_GREEK;
     876           1 :     case 10007: return RTL_TEXTENCODING_APPLE_CYRILLIC;
     877           1 :     case 10008: return RTL_TEXTENCODING_APPLE_CHINSIMP;
     878           1 :     case 10010: return RTL_TEXTENCODING_APPLE_ROMANIAN;
     879           1 :     case 10017: return RTL_TEXTENCODING_APPLE_UKRAINIAN;
     880           1 :     case 10029: return RTL_TEXTENCODING_APPLE_CENTEURO;
     881           1 :     case 10079: return RTL_TEXTENCODING_APPLE_ICELAND;
     882           1 :     case 10081: return RTL_TEXTENCODING_APPLE_TURKISH;
     883           1 :     case 10082: return RTL_TEXTENCODING_APPLE_CROATIAN;
     884           1 :     case 20127: return RTL_TEXTENCODING_ASCII_US;
     885           1 :     case 20866: return RTL_TEXTENCODING_KOI8_R;
     886           1 :     case 21866: return RTL_TEXTENCODING_KOI8_U;
     887           1 :     case 28591: return RTL_TEXTENCODING_ISO_8859_1;
     888           1 :     case 28592: return RTL_TEXTENCODING_ISO_8859_2;
     889           1 :     case 28593: return RTL_TEXTENCODING_ISO_8859_3;
     890           1 :     case 28594: return RTL_TEXTENCODING_ISO_8859_4;
     891           1 :     case 28595: return RTL_TEXTENCODING_ISO_8859_5;
     892           1 :     case 28596: return RTL_TEXTENCODING_ISO_8859_6;
     893           1 :     case 28597: return RTL_TEXTENCODING_ISO_8859_7;
     894           1 :     case 28598: return RTL_TEXTENCODING_ISO_8859_8;
     895           1 :     case 28599: return RTL_TEXTENCODING_ISO_8859_9;
     896           1 :     case 28605: return RTL_TEXTENCODING_ISO_8859_15;
     897           1 :     case 50220: return RTL_TEXTENCODING_ISO_2022_JP;
     898           1 :     case 50225: return RTL_TEXTENCODING_ISO_2022_KR;
     899           1 :     case 51932: return RTL_TEXTENCODING_EUC_JP;
     900           1 :     case 51936: return RTL_TEXTENCODING_EUC_CN;
     901           1 :     case 51949: return RTL_TEXTENCODING_EUC_KR;
     902           1 :     case 57002: return RTL_TEXTENCODING_ISCII_DEVANAGARI;
     903           1 :     case 65000: return RTL_TEXTENCODING_UTF7;
     904          30 :     case 65001: return RTL_TEXTENCODING_UTF8;
     905           4 :     default: return RTL_TEXTENCODING_DONTKNOW;
     906             :     }
     907             : }
     908             : 
     909             : sal_uInt32 SAL_CALL
     910          69 : rtl_getWindowsCodePageFromTextEncoding(rtl_TextEncoding nEncoding)
     911             : {
     912          69 :     switch (nEncoding)
     913             :     {
     914           1 :     case RTL_TEXTENCODING_IBM_437: return 437;
     915             :  /* case RTL_TEXTENCODING_ISO_8859_6: return 708; */
     916           1 :     case RTL_TEXTENCODING_IBM_737: return 737;
     917           1 :     case RTL_TEXTENCODING_IBM_775: return 775;
     918           1 :     case RTL_TEXTENCODING_IBM_850: return 850;
     919           1 :     case RTL_TEXTENCODING_IBM_852: return 852;
     920           1 :     case RTL_TEXTENCODING_IBM_855: return 855;
     921           1 :     case RTL_TEXTENCODING_IBM_857: return 857;
     922           1 :     case RTL_TEXTENCODING_IBM_860: return 860;
     923           1 :     case RTL_TEXTENCODING_IBM_861: return 861;
     924           1 :     case RTL_TEXTENCODING_IBM_862: return 862;
     925           1 :     case RTL_TEXTENCODING_IBM_863: return 863;
     926           1 :     case RTL_TEXTENCODING_IBM_864: return 864;
     927           1 :     case RTL_TEXTENCODING_IBM_865: return 865;
     928           1 :     case RTL_TEXTENCODING_IBM_866: return 866;
     929           1 :     case RTL_TEXTENCODING_IBM_869: return 869;
     930           1 :     case RTL_TEXTENCODING_MS_874: return 874;
     931           1 :     case RTL_TEXTENCODING_MS_932: return 932;
     932           1 :     case RTL_TEXTENCODING_MS_936: return 936;
     933           1 :     case RTL_TEXTENCODING_MS_949: return 949;
     934           1 :     case RTL_TEXTENCODING_MS_950: return 950;
     935           1 :     case RTL_TEXTENCODING_MS_1250: return 1250;
     936           1 :     case RTL_TEXTENCODING_MS_1251: return 1251;
     937           1 :     case RTL_TEXTENCODING_MS_1252: return 1252;
     938           1 :     case RTL_TEXTENCODING_MS_1253: return 1253;
     939           1 :     case RTL_TEXTENCODING_MS_1254: return 1254;
     940           1 :     case RTL_TEXTENCODING_MS_1255: return 1255;
     941           1 :     case RTL_TEXTENCODING_MS_1256: return 1256;
     942           1 :     case RTL_TEXTENCODING_MS_1257: return 1257;
     943           1 :     case RTL_TEXTENCODING_MS_1258: return 1258;
     944           1 :     case RTL_TEXTENCODING_MS_1361: return 1361;
     945           1 :     case RTL_TEXTENCODING_APPLE_ROMAN: return 10000;
     946           1 :     case RTL_TEXTENCODING_APPLE_JAPANESE: return 10001;
     947           1 :     case RTL_TEXTENCODING_APPLE_CHINTRAD: return 10002;
     948           1 :     case RTL_TEXTENCODING_APPLE_KOREAN: return 10003;
     949           1 :     case RTL_TEXTENCODING_APPLE_ARABIC: return 10004;
     950           1 :     case RTL_TEXTENCODING_APPLE_HEBREW: return 10005;
     951           1 :     case RTL_TEXTENCODING_APPLE_GREEK: return 10006;
     952           1 :     case RTL_TEXTENCODING_APPLE_CYRILLIC: return 10007;
     953           1 :     case RTL_TEXTENCODING_APPLE_CHINSIMP: return 10008;
     954           1 :     case RTL_TEXTENCODING_APPLE_ROMANIAN: return 10010;
     955           1 :     case RTL_TEXTENCODING_APPLE_UKRAINIAN: return 10017;
     956           1 :     case RTL_TEXTENCODING_APPLE_CENTEURO: return 10029;
     957           1 :     case RTL_TEXTENCODING_APPLE_ICELAND: return 10079;
     958           1 :     case RTL_TEXTENCODING_APPLE_TURKISH: return 10081;
     959           1 :     case RTL_TEXTENCODING_APPLE_CROATIAN: return 10082;
     960           1 :     case RTL_TEXTENCODING_ASCII_US: return 20127;
     961           1 :     case RTL_TEXTENCODING_KOI8_R: return 20866;
     962           1 :     case RTL_TEXTENCODING_KOI8_U: return 21866;
     963           1 :     case RTL_TEXTENCODING_ISO_8859_1: return 28591;
     964           1 :     case RTL_TEXTENCODING_ISO_8859_2: return 28592;
     965           1 :     case RTL_TEXTENCODING_ISO_8859_3: return 28593;
     966           1 :     case RTL_TEXTENCODING_ISO_8859_4: return 28594;
     967           1 :     case RTL_TEXTENCODING_ISO_8859_5: return 28595;
     968           1 :     case RTL_TEXTENCODING_ISO_8859_6: return 28596;
     969           1 :     case RTL_TEXTENCODING_ISO_8859_7: return 28597;
     970           1 :     case RTL_TEXTENCODING_ISO_8859_8: return 28598;
     971           1 :     case RTL_TEXTENCODING_ISO_8859_9: return 28599;
     972           1 :     case RTL_TEXTENCODING_ISO_8859_15: return 28605;
     973           1 :     case RTL_TEXTENCODING_ISO_2022_JP: return 50220;
     974           1 :     case RTL_TEXTENCODING_ISO_2022_KR: return 50225;
     975           1 :     case RTL_TEXTENCODING_EUC_JP: return 51932;
     976           1 :     case RTL_TEXTENCODING_EUC_CN: return 51936;
     977           1 :     case RTL_TEXTENCODING_EUC_KR: return 51949;
     978           1 :     case RTL_TEXTENCODING_ISCII_DEVANAGARI: return 57002;
     979           1 :     case RTL_TEXTENCODING_UTF7: return 65000;
     980           1 :     case RTL_TEXTENCODING_UTF8: return 65001;
     981           3 :     default: return 0;
     982             :     }
     983             : }
     984             : 
     985             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10