LCOV - code coverage report
Current view: top level - sal/textenc - tencinfo.cxx (source / functions) Hit Total Coverage
Test: commit e02a6cb2c3e2b23b203b422e4e0680877f232636 Lines: 12 290 4.1 %
Date: 2014-04-14 Functions: 1 14 7.1 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "sal/config.h"
      21             : 
      22             : #include <cstddef>
      23             : #include <cstring>
      24             : 
      25             : #include "rtl/tencinfo.h"
      26             : 
      27             : #include "gettextencodingdata.hxx"
      28             : #include "tenchelp.hxx"
      29             : 
      30           0 : sal_Bool SAL_CALL rtl_isOctetTextEncoding(rtl_TextEncoding nEncoding)
      31             : {
      32             :     return
      33             :         nEncoding > RTL_TEXTENCODING_DONTKNOW
      34           0 :         && nEncoding != 9 // RTL_TEXTENCODING_SYSTEM
      35           0 :         && nEncoding <= RTL_TEXTENCODING_ADOBE_DINGBATS; // always update this!
      36             : }
      37             : 
      38             : /* ======================================================================= */
      39             : 
      40           0 : static void Impl_toAsciiLower( const char* pName, char* pBuf )
      41             : {
      42           0 :     while ( *pName )
      43             :     {
      44             :         /* A-Z */
      45           0 :         if ( (*pName >= 0x41) && (*pName <= 0x5A) )
      46           0 :             *pBuf = (*pName)+0x20;  /* toAsciiLower */
      47             :         else
      48           0 :             *pBuf = *pName;
      49             : 
      50           0 :         pBuf++;
      51           0 :         pName++;
      52             :     }
      53             : 
      54           0 :     *pBuf = '\0';
      55           0 : }
      56             : 
      57             : /* ----------------------------------------------------------------------- */
      58             : 
      59           0 : static void Impl_toAsciiLowerAndRemoveNonAlphanumeric( const char* pName, char* pBuf )
      60             : {
      61           0 :     while ( *pName )
      62             :     {
      63             :         /* A-Z */
      64           0 :         if ( (*pName >= 0x41) && (*pName <= 0x5A) )
      65             :         {
      66           0 :             *pBuf = (*pName)+0x20;  /* toAsciiLower */
      67           0 :             pBuf++;
      68             :         }
      69             :         /* a-z, 0-9 */
      70           0 :         else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) ||
      71           0 :                   ((*pName >= 0x30) && (*pName <= 0x39)) )
      72             :         {
      73           0 :             *pBuf = *pName;
      74           0 :             pBuf++;
      75             :         }
      76             : 
      77           0 :         pName++;
      78             :     }
      79             : 
      80           0 :     *pBuf = '\0';
      81           0 : }
      82             : 
      83             : /* ----------------------------------------------------------------------- */
      84             : 
      85             : /* pMatchStr must match with all characters in pCompStr */
      86           0 : static bool Impl_matchString( const char* pCompStr, const char* pMatchStr )
      87             : {
      88             :     /* We test only for end in MatchStr, because the last 0 character from */
      89             :     /* pCompStr is unequal a character in MatchStr, so the loop terminates */
      90           0 :     while ( *pMatchStr )
      91             :     {
      92           0 :         if ( *pCompStr != *pMatchStr )
      93           0 :             return false;
      94             : 
      95           0 :         pCompStr++;
      96           0 :         pMatchStr++;
      97             :     }
      98             : 
      99           0 :     return true;
     100             : }
     101             : 
     102             : /* ======================================================================= */
     103             : 
     104             : struct ImplStrCharsetDef
     105             : {
     106             :     const char*             mpCharsetStr;
     107             :     rtl_TextEncoding            meTextEncoding;
     108             : };
     109             : 
     110             : struct ImplStrFirstPartCharsetDef
     111             : {
     112             :     const char*             mpCharsetStr;
     113             :     const ImplStrCharsetDef*    mpSecondPartTab;
     114             : };
     115             : 
     116             : /* ======================================================================= */
     117             : 
     118      255037 : sal_Bool SAL_CALL rtl_getTextEncodingInfo( rtl_TextEncoding eTextEncoding, rtl_TextEncodingInfo* pEncInfo )
     119             : {
     120             :     const ImplTextEncodingData* pData;
     121             : 
     122      255037 :     pData = Impl_getTextEncodingData( eTextEncoding );
     123      255037 :     if ( !pData )
     124             :     {
     125             :         /* HACK: For not implemented encoding, because not all
     126             :            calls handle the errors */
     127           0 :         if ( pEncInfo->StructSize < 5 )
     128           0 :             return false;
     129           0 :         pEncInfo->MinimumCharSize = 1;
     130             : 
     131           0 :         if ( pEncInfo->StructSize < 6 )
     132           0 :             return true;
     133           0 :         pEncInfo->MaximumCharSize = 1;
     134             : 
     135           0 :         if ( pEncInfo->StructSize < 7 )
     136           0 :             return true;
     137           0 :         pEncInfo->AverageCharSize = 1;
     138             : 
     139           0 :         if ( pEncInfo->StructSize < 12 )
     140           0 :             return true;
     141           0 :         pEncInfo->Flags = 0;
     142             : 
     143           0 :         return false;
     144             :     }
     145             : 
     146      255037 :     if ( pEncInfo->StructSize < 5 )
     147           0 :         return false;
     148      255037 :     pEncInfo->MinimumCharSize = pData->mnMinCharSize;
     149             : 
     150      255037 :     if ( pEncInfo->StructSize < 6 )
     151           0 :         return true;
     152      255037 :     pEncInfo->MaximumCharSize = pData->mnMaxCharSize;
     153             : 
     154      255037 :     if ( pEncInfo->StructSize < 7 )
     155           0 :         return true;
     156      255037 :     pEncInfo->AverageCharSize = pData->mnAveCharSize;
     157             : 
     158      255037 :     if ( pEncInfo->StructSize < 12 )
     159           0 :         return true;
     160      255037 :     pEncInfo->Flags = pData->mnInfoFlags;
     161             : 
     162      255037 :     return true;
     163             : }
     164             : 
     165             : /* ======================================================================= */
     166             : 
     167           0 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromWindowsCharset( sal_uInt8 nWinCharset )
     168             : {
     169             :     rtl_TextEncoding eTextEncoding;
     170             : 
     171           0 :     switch ( nWinCharset )
     172             :     {
     173           0 :         case 0:     eTextEncoding = RTL_TEXTENCODING_MS_1252; break;    /* ANSI_CHARSET */
     174           0 :         case 2:     eTextEncoding = RTL_TEXTENCODING_SYMBOL; break;     /* SYMBOL_CHARSET */
     175           0 :         case 77:    eTextEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break;/* MAC_CHARSET */
     176           0 :         case 128:   eTextEncoding = RTL_TEXTENCODING_MS_932; break;     /* SHIFTJIS_CHARSET */
     177           0 :         case 129:   eTextEncoding = RTL_TEXTENCODING_MS_949; break;     /* HANGEUL_CHARSET */
     178           0 :         case 130:   eTextEncoding = RTL_TEXTENCODING_MS_1361; break;    /* JOHAB_CHARSET */
     179           0 :         case 134:   eTextEncoding = RTL_TEXTENCODING_MS_936; break;     /* GB2312_CHARSET */
     180           0 :         case 136:   eTextEncoding = RTL_TEXTENCODING_MS_950; break;     /* CHINESEBIG5_CHARSET */
     181           0 :         case 161:   eTextEncoding = RTL_TEXTENCODING_MS_1253; break;    /* GREEK_CHARSET */
     182           0 :         case 162:   eTextEncoding = RTL_TEXTENCODING_MS_1254; break;    /* TURKISH_CHARSET */
     183           0 :         case 163:   eTextEncoding = RTL_TEXTENCODING_MS_1258; break;    /* VIETNAMESE_CHARSET !!! */
     184           0 :         case 177:   eTextEncoding = RTL_TEXTENCODING_MS_1255; break;    /* HEBREW_CHARSET */
     185           0 :         case 178:   eTextEncoding = RTL_TEXTENCODING_MS_1256; break;    /* ARABIC_CHARSET */
     186           0 :         case 186:   eTextEncoding = RTL_TEXTENCODING_MS_1257; break;    /* BALTIC_CHARSET */
     187           0 :         case 204:   eTextEncoding = RTL_TEXTENCODING_MS_1251; break;    /* RUSSIAN_CHARSET */
     188           0 :         case 222:   eTextEncoding = RTL_TEXTENCODING_MS_874; break;     /* THAI_CHARSET */
     189           0 :         case 238:   eTextEncoding = RTL_TEXTENCODING_MS_1250; break;    /* EASTEUROPE_CHARSET */
     190           0 :         case 255:   eTextEncoding = RTL_TEXTENCODING_IBM_850; break;    /* OEM_CHARSET */
     191           0 :         default:    eTextEncoding = RTL_TEXTENCODING_DONTKNOW; break;
     192             :     };
     193             : 
     194           0 :     return eTextEncoding;
     195             : }
     196             : 
     197             : /* ----------------------------------------------------------------------- */
     198             : 
     199           0 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromUnixCharset( const char* pUnixCharset )
     200             : {
     201             :     /* See <ftp://ftp.x.org/pub/DOCS/registry>, section 14 ("Font Charset
     202             :      * (Registry and Encoding) Names").
     203             :      */
     204             : 
     205             :     /* All Identifiers in the tables are lower case The function search */
     206             :     /* for the first matching string in the tables. */
     207             :     /* Sort order: unique (first 14, than 1), important */
     208             : 
     209             :     static ImplStrCharsetDef const aUnixCharsetISOTab[] =
     210             :     {
     211             :         { "15", RTL_TEXTENCODING_ISO_8859_15 },
     212             :         { "14", RTL_TEXTENCODING_ISO_8859_14 },
     213             :         { "13", RTL_TEXTENCODING_ISO_8859_13 },
     214             :         { "11", RTL_TEXTENCODING_TIS_620 },
     215             :         { "10", RTL_TEXTENCODING_ISO_8859_10 },
     216             :         { "1", RTL_TEXTENCODING_ISO_8859_1 },
     217             :         { "2", RTL_TEXTENCODING_ISO_8859_2 },
     218             :         { "3", RTL_TEXTENCODING_ISO_8859_3 },
     219             :         { "4", RTL_TEXTENCODING_ISO_8859_4 },
     220             :         { "5", RTL_TEXTENCODING_ISO_8859_5 },
     221             :         { "6", RTL_TEXTENCODING_ISO_8859_6 },
     222             :         { "7", RTL_TEXTENCODING_ISO_8859_7 },
     223             :         { "8", RTL_TEXTENCODING_ISO_8859_8 },
     224             :         { "9", RTL_TEXTENCODING_ISO_8859_9 },
     225             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     226             :     };
     227             : 
     228             :     static ImplStrCharsetDef const aUnixCharsetADOBETab[] =
     229             :     {
     230             :         { "fontspecific", RTL_TEXTENCODING_SYMBOL },
     231             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     232             :     };
     233             : 
     234             :     static ImplStrCharsetDef const aUnixCharsetMSTab[] =
     235             :     {
     236             :         { "1252", RTL_TEXTENCODING_MS_1252 },
     237             :         { "1250", RTL_TEXTENCODING_MS_1250 },
     238             :         { "1251", RTL_TEXTENCODING_MS_1251 },
     239             :         { "1253", RTL_TEXTENCODING_MS_1253 },
     240             :         { "1254", RTL_TEXTENCODING_MS_1254 },
     241             :         { "1255", RTL_TEXTENCODING_MS_1255 },
     242             :         { "1256", RTL_TEXTENCODING_MS_1256 },
     243             :         { "1257", RTL_TEXTENCODING_MS_1257 },
     244             :         { "1258", RTL_TEXTENCODING_MS_1258 },
     245             :         { "932", RTL_TEXTENCODING_MS_932 },
     246             :         { "936", RTL_TEXTENCODING_MS_936 },
     247             :         { "949", RTL_TEXTENCODING_MS_949 },
     248             :         { "950", RTL_TEXTENCODING_MS_950 },
     249             :         { "1361", RTL_TEXTENCODING_MS_1361 },
     250             :         { "cp1252", RTL_TEXTENCODING_MS_1252 },
     251             :         { "cp1250", RTL_TEXTENCODING_MS_1250 },
     252             :         { "cp1251", RTL_TEXTENCODING_MS_1251 },
     253             :         { "cp1253", RTL_TEXTENCODING_MS_1253 },
     254             :         { "cp1254", RTL_TEXTENCODING_MS_1254 },
     255             :         { "cp1255", RTL_TEXTENCODING_MS_1255 },
     256             :         { "cp1256", RTL_TEXTENCODING_MS_1256 },
     257             :         { "cp1257", RTL_TEXTENCODING_MS_1257 },
     258             :         { "cp1258", RTL_TEXTENCODING_MS_1258 },
     259             :         { "cp932", RTL_TEXTENCODING_MS_932 },
     260             :         { "cp936", RTL_TEXTENCODING_MS_936 },
     261             :         { "cp949", RTL_TEXTENCODING_MS_949 },
     262             :         { "cp950", RTL_TEXTENCODING_MS_950 },
     263             :         { "cp1361", RTL_TEXTENCODING_MS_1361 },
     264             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     265             :     };
     266             : 
     267             :     static ImplStrCharsetDef const aUnixCharsetIBMTab[] =
     268             :     {
     269             :         { "437", RTL_TEXTENCODING_IBM_437 },
     270             :         { "850", RTL_TEXTENCODING_IBM_850 },
     271             :         { "860", RTL_TEXTENCODING_IBM_860 },
     272             :         { "861", RTL_TEXTENCODING_IBM_861 },
     273             :         { "863", RTL_TEXTENCODING_IBM_863 },
     274             :         { "865", RTL_TEXTENCODING_IBM_865 },
     275             :         { "737", RTL_TEXTENCODING_IBM_737 },
     276             :         { "775", RTL_TEXTENCODING_IBM_775 },
     277             :         { "852", RTL_TEXTENCODING_IBM_852 },
     278             :         { "855", RTL_TEXTENCODING_IBM_855 },
     279             :         { "857", RTL_TEXTENCODING_IBM_857 },
     280             :         { "862", RTL_TEXTENCODING_IBM_862 },
     281             :         { "864", RTL_TEXTENCODING_IBM_864 },
     282             :         { "866", RTL_TEXTENCODING_IBM_866 },
     283             :         { "869", RTL_TEXTENCODING_IBM_869 },
     284             :         { "874", RTL_TEXTENCODING_MS_874 },
     285             :         { "1004", RTL_TEXTENCODING_MS_1252 },
     286             :         { "65400", RTL_TEXTENCODING_SYMBOL },
     287             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     288             :     };
     289             : 
     290             :     static ImplStrCharsetDef const aUnixCharsetKOI8Tab[] =
     291             :     {
     292             :         { "r", RTL_TEXTENCODING_KOI8_R },
     293             :         { "u", RTL_TEXTENCODING_KOI8_U },
     294             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     295             :     };
     296             : 
     297             :     static ImplStrCharsetDef const aUnixCharsetJISX0208Tab[] =
     298             :     {
     299             :         { NULL, RTL_TEXTENCODING_JIS_X_0208 }
     300             :     };
     301             : 
     302             :     static ImplStrCharsetDef const aUnixCharsetJISX0201Tab[] =
     303             :     {
     304             :         { NULL, RTL_TEXTENCODING_JIS_X_0201 }
     305             :     };
     306             : 
     307             :     static ImplStrCharsetDef const aUnixCharsetJISX0212Tab[] =
     308             :     {
     309             :         { NULL, RTL_TEXTENCODING_JIS_X_0212 }
     310             :     };
     311             : 
     312             :     static ImplStrCharsetDef const aUnixCharsetGBTab[] =
     313             :     {
     314             :         { NULL, RTL_TEXTENCODING_GB_2312 }
     315             :     };
     316             : 
     317             :     static ImplStrCharsetDef const aUnixCharsetGBKTab[] =
     318             :     {
     319             :         { NULL, RTL_TEXTENCODING_GBK }
     320             :     };
     321             : 
     322             :     static ImplStrCharsetDef const aUnixCharsetBIG5Tab[] =
     323             :     {
     324             :         { NULL, RTL_TEXTENCODING_BIG5 }
     325             :     };
     326             : 
     327             :     static ImplStrCharsetDef const aUnixCharsetKSC56011987Tab[] =
     328             :     {
     329             :         { NULL, RTL_TEXTENCODING_EUC_KR }
     330             :     };
     331             : 
     332             :     static ImplStrCharsetDef const aUnixCharsetKSC56011992Tab[] =
     333             :     {
     334             :         { NULL, RTL_TEXTENCODING_MS_1361 }
     335             :     };
     336             : 
     337             :     static ImplStrCharsetDef const aUnixCharsetISO10646Tab[] =
     338             :     {
     339             :         { NULL, RTL_TEXTENCODING_UNICODE }
     340             :     };
     341             : 
     342             :     static ImplStrCharsetDef const aUnixCharsetUNICODETab[] =
     343             :     {
     344             : /* Currently every Unicode Encoding is for us Unicode */
     345             : /*        { "fontspecific", RTL_TEXTENCODING_UNICODE }, */
     346             :         { NULL, RTL_TEXTENCODING_UNICODE }
     347             :     };
     348             : 
     349             :     static ImplStrCharsetDef const aUnixCharsetSymbolTab[] =
     350             :     {
     351             :         { NULL, RTL_TEXTENCODING_SYMBOL }
     352             :     };
     353             : 
     354             :     /* See <http://cvs.freedesktop.org/xorg/xc/fonts/encodings/iso8859-11.enc?
     355             :        rev=1.1.1.1>: */
     356             :     static ImplStrCharsetDef const aUnixCharsetTIS620Tab[] =
     357             :     {
     358             :         { "0", RTL_TEXTENCODING_TIS_620 },
     359             :         { "2529", RTL_TEXTENCODING_TIS_620 },
     360             :         { "2533", RTL_TEXTENCODING_TIS_620 },
     361             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     362             :     };
     363             :     static ImplStrCharsetDef const aUnixCharsetTIS6202529Tab[] =
     364             :     {
     365             :         { "1", RTL_TEXTENCODING_TIS_620 },
     366             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     367             :     };
     368             :     static ImplStrCharsetDef const aUnixCharsetTIS6202533Tab[] =
     369             :     {
     370             :         { "0", RTL_TEXTENCODING_TIS_620 },
     371             :         { "1", RTL_TEXTENCODING_TIS_620 },
     372             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     373             :     };
     374             : 
     375             :     static ImplStrFirstPartCharsetDef const aUnixCharsetFirstPartTab[] =
     376             :     {
     377             :         { "iso8859", aUnixCharsetISOTab },
     378             :         { "adobe", aUnixCharsetADOBETab },
     379             :         { "ansi", aUnixCharsetMSTab },
     380             :         { "microsoft", aUnixCharsetMSTab },
     381             :         { "ibm", aUnixCharsetIBMTab },
     382             :         { "koi8", aUnixCharsetKOI8Tab },
     383             :         { "jisx0208", aUnixCharsetJISX0208Tab },
     384             :         { "jisx0208.1983", aUnixCharsetJISX0208Tab },
     385             :         { "jisx0201", aUnixCharsetJISX0201Tab },
     386             :         { "jisx0201.1976", aUnixCharsetJISX0201Tab },
     387             :         { "jisx0212", aUnixCharsetJISX0212Tab },
     388             :         { "jisx0212.1990", aUnixCharsetJISX0212Tab },
     389             :         { "gb2312", aUnixCharsetGBTab },
     390             :         { "gbk", aUnixCharsetGBKTab },
     391             :         { "big5", aUnixCharsetBIG5Tab },
     392             :         { "iso10646", aUnixCharsetISO10646Tab },
     393             : /*      { "unicode", aUnixCharsetUNICODETab }, */ /* fonts contain only default chars */
     394             :         { "sunolcursor", aUnixCharsetSymbolTab },
     395             :         { "sunolglyph", aUnixCharsetSymbolTab },
     396             :         { "iso10646", aUnixCharsetUNICODETab },
     397             :         { "ksc5601.1987", aUnixCharsetKSC56011987Tab },
     398             :         { "ksc5601.1992", aUnixCharsetKSC56011992Tab },
     399             :         { "tis620.2529", aUnixCharsetTIS6202529Tab },
     400             :         { "tis620.2533", aUnixCharsetTIS6202533Tab },
     401             :         { "tis620", aUnixCharsetTIS620Tab },
     402             : /*        { "sunudcja.1997",  },        */
     403             : /*        { "sunudcko.1997",  },        */
     404             : /*        { "sunudczh.1997",  },        */
     405             : /*        { "sunudczhtw.1997",  },      */
     406             :         { NULL, NULL }
     407             :     };
     408             : 
     409           0 :     rtl_TextEncoding    eEncoding = RTL_TEXTENCODING_DONTKNOW;
     410             :     char*           pBuf;
     411             :     char*           pTempBuf;
     412           0 :     sal_uInt32          nBufLen = strlen( pUnixCharset )+1;
     413             :     const char*     pFirstPart;
     414             :     const char*     pSecondPart;
     415             : 
     416             :     /* Alloc Buffer and map to lower case */
     417           0 :     pBuf = new char[nBufLen];
     418           0 :     Impl_toAsciiLower( pUnixCharset, pBuf );
     419             : 
     420             :     /* Search FirstPart */
     421           0 :     pFirstPart = pBuf;
     422           0 :     pSecondPart = NULL;
     423           0 :     pTempBuf = pBuf;
     424           0 :     while ( *pTempBuf )
     425             :     {
     426           0 :         if ( *pTempBuf == '-' )
     427             :         {
     428           0 :             *pTempBuf = '\0';
     429           0 :             pSecondPart = pTempBuf+1;
     430           0 :             break;
     431             :         }
     432             : 
     433           0 :         pTempBuf++;
     434             :     }
     435             : 
     436             :     /* Parttrenner gefunden */
     437           0 :     if ( pSecondPart )
     438             :     {
     439             :         /* Search for the part tab */
     440           0 :         const ImplStrFirstPartCharsetDef* pFirstPartData = aUnixCharsetFirstPartTab;
     441           0 :         while ( pFirstPartData->mpCharsetStr )
     442             :         {
     443           0 :             if ( Impl_matchString( pFirstPart, pFirstPartData->mpCharsetStr ) )
     444             :             {
     445             :                 /* Search for the charset in the second part tab */
     446           0 :                 const ImplStrCharsetDef* pData = pFirstPartData->mpSecondPartTab;
     447           0 :                 while ( pData->mpCharsetStr )
     448             :                 {
     449           0 :                     if ( Impl_matchString( pSecondPart, pData->mpCharsetStr ) )
     450             :                     {
     451           0 :                         break;
     452             :                     }
     453             : 
     454           0 :                     pData++;
     455             :                 }
     456             : 
     457             :                 /* use default encoding for first part */
     458           0 :                 eEncoding = pData->meTextEncoding;
     459           0 :                 break;
     460             :             }
     461             : 
     462           0 :             pFirstPartData++;
     463             :         }
     464             :     }
     465             : 
     466           0 :     delete[] pBuf;
     467             : 
     468           0 :     return eEncoding;
     469             : }
     470             : 
     471             : /* ----------------------------------------------------------------------- */
     472             : 
     473           0 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromMimeCharset( const char* pMimeCharset )
     474             : {
     475             :     /* All Identifiers are in lower case and contain only alphanumeric */
     476             :     /* characters. The function search for the first equal string in */
     477             :     /* the table. In this table are only the most used mime types. */
     478             :     /* Sort order: important */
     479             :     static ImplStrCharsetDef const aVIPMimeCharsetTab[] =
     480             :     {
     481             :         { "usascii", RTL_TEXTENCODING_ASCII_US },
     482             :         { "utf8", RTL_TEXTENCODING_UTF8 },
     483             :         { "utf7", RTL_TEXTENCODING_UTF7 },
     484             :         { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
     485             :         { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
     486             :         { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
     487             :         { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
     488             :         { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
     489             :         { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
     490             :         { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
     491             :         { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
     492             :         { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
     493             :         { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
     494             :         { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
     495             :         { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
     496             :         { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
     497             :         { "iso2022jp", RTL_TEXTENCODING_ISO_2022_JP },
     498             :         { "iso2022jp2", RTL_TEXTENCODING_ISO_2022_JP },
     499             :         { "iso2022cn", RTL_TEXTENCODING_ISO_2022_CN },
     500             :         { "iso2022cnext", RTL_TEXTENCODING_ISO_2022_CN },
     501             :         { "iso2022kr", RTL_TEXTENCODING_ISO_2022_KR },
     502             :         { "eucjp", RTL_TEXTENCODING_EUC_JP },
     503             :         { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
     504             :         { "mskanji", RTL_TEXTENCODING_MS_932 },
     505             :         { "gb2312", RTL_TEXTENCODING_GB_2312 },
     506             :         { "cngb", RTL_TEXTENCODING_GB_2312 },
     507             :         { "big5", RTL_TEXTENCODING_BIG5 },
     508             :         { "cnbig5", RTL_TEXTENCODING_BIG5 },
     509             :         { "cngb12345", RTL_TEXTENCODING_GBT_12345 },
     510             :         { "euckr", RTL_TEXTENCODING_EUC_KR },
     511             :         { "koi8r", RTL_TEXTENCODING_KOI8_R },
     512             :         { "windows1252", RTL_TEXTENCODING_MS_1252 },
     513             :         { "windows1250", RTL_TEXTENCODING_MS_1250 },
     514             :         { "windows1251", RTL_TEXTENCODING_MS_1251 },
     515             :         { "windows1253", RTL_TEXTENCODING_MS_1253 },
     516             :         { "windows1254", RTL_TEXTENCODING_MS_1254 },
     517             :         { "windows1255", RTL_TEXTENCODING_MS_1255 },
     518             :         { "windows1256", RTL_TEXTENCODING_MS_1256 },
     519             :         { "windows1257", RTL_TEXTENCODING_MS_1257 },
     520             :         { "windows1258", RTL_TEXTENCODING_MS_1258 },
     521             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     522             :     };
     523             : 
     524             :     /* All Identifiers are in lower case and contain only alphanumeric */
     525             :     /* characters. The function search for the first matching string in */
     526             :     /* the table. */
     527             :     /* Sort order: unique (first iso885914, than iso88591), important */
     528             :     static ImplStrCharsetDef const aMimeCharsetTab[] =
     529             :     {
     530             :         { "unicode11utf7", RTL_TEXTENCODING_UTF7 },
     531             :         { "caunicode11utf7", RTL_TEXTENCODING_UTF7 },
     532             :         { "iso88591windows30", RTL_TEXTENCODING_ISO_8859_1 },
     533             :         { "iso88591win", RTL_TEXTENCODING_MS_1252 },
     534             :         { "iso88592win", RTL_TEXTENCODING_MS_1250 },
     535             :         { "iso88599win", RTL_TEXTENCODING_MS_1254 },
     536             :         { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
     537             :         { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
     538             :         { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
     539             :         { "iso885911", RTL_TEXTENCODING_TIS_620 },
     540             :             /* This is no official MIME character set name, but it might be in
     541             :                use in Thailand. */
     542             :         { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
     543             :         { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
     544             :         { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
     545             :         { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
     546             :         { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
     547             :         { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
     548             :         { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
     549             :         { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
     550             :         { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
     551             :         { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
     552             :         { "isoir100", RTL_TEXTENCODING_ISO_8859_1 },
     553             :         { "latin1", RTL_TEXTENCODING_ISO_8859_1 },
     554             :         { "l1", RTL_TEXTENCODING_ISO_8859_1 },
     555             :         { "cp819", RTL_TEXTENCODING_ISO_8859_1 },
     556             :         { "ibm819", RTL_TEXTENCODING_ISO_8859_1 },
     557             :         { "csisolatin1", RTL_TEXTENCODING_ISO_8859_1 },
     558             :         { "isoir101", RTL_TEXTENCODING_ISO_8859_2 },
     559             :         { "latin2", RTL_TEXTENCODING_ISO_8859_2 },
     560             :         { "l2", RTL_TEXTENCODING_ISO_8859_2 },
     561             :         { "csisolatin2", RTL_TEXTENCODING_ISO_8859_2 },
     562             :         { "isoir109", RTL_TEXTENCODING_ISO_8859_3 },
     563             :         { "latin3", RTL_TEXTENCODING_ISO_8859_3 },
     564             :         { "l3", RTL_TEXTENCODING_ISO_8859_3 },
     565             :         { "csisolatin3", RTL_TEXTENCODING_ISO_8859_3 },
     566             :         { "isoir110", RTL_TEXTENCODING_ISO_8859_4 },
     567             :         { "latin4", RTL_TEXTENCODING_ISO_8859_4 },
     568             :         { "l4", RTL_TEXTENCODING_ISO_8859_4 },
     569             :         { "csisolatin4", RTL_TEXTENCODING_ISO_8859_4 },
     570             :         { "isoir144", RTL_TEXTENCODING_ISO_8859_5 },
     571             :         { "cyrillicasian", RTL_TEXTENCODING_PT154 },
     572             :         { "cyrillic", RTL_TEXTENCODING_ISO_8859_5 },
     573             :         { "csisolatincyrillic", RTL_TEXTENCODING_ISO_8859_5 },
     574             :         { "isoir127", RTL_TEXTENCODING_ISO_8859_6 },
     575             :         { "arabic", RTL_TEXTENCODING_ISO_8859_6 },
     576             :         { "csisolatinarabic", RTL_TEXTENCODING_ISO_8859_6 },
     577             :         { "ecma114", RTL_TEXTENCODING_ISO_8859_6 },
     578             :         { "asmo708", RTL_TEXTENCODING_ISO_8859_6 },
     579             :         { "isoir126", RTL_TEXTENCODING_ISO_8859_7 },
     580             :         { "greek", RTL_TEXTENCODING_ISO_8859_7 },
     581             :         { "csisolatingreek", RTL_TEXTENCODING_ISO_8859_7 },
     582             :         { "elot928", RTL_TEXTENCODING_ISO_8859_7 },
     583             :         { "ecma118", RTL_TEXTENCODING_ISO_8859_7 },
     584             :         { "isoir138", RTL_TEXTENCODING_ISO_8859_8 },
     585             :         { "hebrew", RTL_TEXTENCODING_ISO_8859_8 },
     586             :         { "csisolatinhebrew", RTL_TEXTENCODING_ISO_8859_8 },
     587             :         { "isoir148", RTL_TEXTENCODING_ISO_8859_9 },
     588             :         { "latin5", RTL_TEXTENCODING_ISO_8859_9 },
     589             :         { "l5", RTL_TEXTENCODING_ISO_8859_9 },
     590             :         { "csisolatin5", RTL_TEXTENCODING_ISO_8859_9 },
     591             :         { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
     592             :         { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
     593             :         { "cswindows31latin1", RTL_TEXTENCODING_MS_1252 },
     594             :         { "cswindows31latin2", RTL_TEXTENCODING_MS_1250 },
     595             :         { "cswindows31latin5", RTL_TEXTENCODING_MS_1254 },
     596             :         { "iso10646us", RTL_TEXTENCODING_ASCII_US },
     597             :         { "iso646irv", RTL_TEXTENCODING_ASCII_US },
     598             :         { "cskoi8r", RTL_TEXTENCODING_KOI8_R },
     599             :         { "ibm437", RTL_TEXTENCODING_IBM_437 },
     600             :         { "cp437", RTL_TEXTENCODING_IBM_437 },
     601             :         { "437", RTL_TEXTENCODING_IBM_437 },
     602             :         { "cspc8codepage437", RTL_TEXTENCODING_IBM_437 },
     603             :         { "ansix34", RTL_TEXTENCODING_ASCII_US },
     604             :         { "ibm367", RTL_TEXTENCODING_ASCII_US },
     605             :         { "cp367", RTL_TEXTENCODING_ASCII_US },
     606             :         { "csascii", RTL_TEXTENCODING_ASCII_US },
     607             :         { "ibm775", RTL_TEXTENCODING_IBM_775 },
     608             :         { "cp775", RTL_TEXTENCODING_IBM_775 },
     609             :         { "cspc775baltic", RTL_TEXTENCODING_IBM_775 },
     610             :         { "ibm850", RTL_TEXTENCODING_IBM_850 },
     611             :         { "cp850", RTL_TEXTENCODING_IBM_850 },
     612             :         { "850", RTL_TEXTENCODING_IBM_850 },
     613             :         { "cspc850multilingual", RTL_TEXTENCODING_IBM_850 },
     614             : /*        { "ibm851", RTL_TEXTENCODING_IBM_851 }, */
     615             : /*        { "cp851", RTL_TEXTENCODING_IBM_851 }, */
     616             : /*        { "851", RTL_TEXTENCODING_IBM_851 }, */
     617             : /*        { "csibm851", RTL_TEXTENCODING_IBM_851 }, */
     618             :         { "ibm852", RTL_TEXTENCODING_IBM_852 },
     619             :         { "cp852", RTL_TEXTENCODING_IBM_852 },
     620             :         { "852", RTL_TEXTENCODING_IBM_852 },
     621             :         { "cspcp852", RTL_TEXTENCODING_IBM_852 },
     622             :         { "ibm855", RTL_TEXTENCODING_IBM_855 },
     623             :         { "cp855", RTL_TEXTENCODING_IBM_855 },
     624             :         { "855", RTL_TEXTENCODING_IBM_855 },
     625             :         { "csibm855", RTL_TEXTENCODING_IBM_855 },
     626             :         { "ibm857", RTL_TEXTENCODING_IBM_857 },
     627             :         { "cp857", RTL_TEXTENCODING_IBM_857 },
     628             :         { "857", RTL_TEXTENCODING_IBM_857 },
     629             :         { "csibm857", RTL_TEXTENCODING_IBM_857 },
     630             :         { "ibm860", RTL_TEXTENCODING_IBM_860 },
     631             :         { "cp860", RTL_TEXTENCODING_IBM_860 },
     632             :         { "860", RTL_TEXTENCODING_IBM_860 },
     633             :         { "csibm860", RTL_TEXTENCODING_IBM_860 },
     634             :         { "ibm861", RTL_TEXTENCODING_IBM_861 },
     635             :         { "cp861", RTL_TEXTENCODING_IBM_861 },
     636             :         { "861", RTL_TEXTENCODING_IBM_861 },
     637             :         { "csis", RTL_TEXTENCODING_IBM_861 },
     638             :         { "csibm861", RTL_TEXTENCODING_IBM_861 },
     639             :         { "ibm862", RTL_TEXTENCODING_IBM_862 },
     640             :         { "cp862", RTL_TEXTENCODING_IBM_862 },
     641             :         { "862", RTL_TEXTENCODING_IBM_862 },
     642             :         { "cspc862latinhebrew", RTL_TEXTENCODING_IBM_862 },
     643             :         { "ibm863", RTL_TEXTENCODING_IBM_863 },
     644             :         { "cp863", RTL_TEXTENCODING_IBM_863 },
     645             :         { "863", RTL_TEXTENCODING_IBM_863 },
     646             :         { "csibm863", RTL_TEXTENCODING_IBM_863 },
     647             :         { "ibm864", RTL_TEXTENCODING_IBM_864 },
     648             :         { "cp864", RTL_TEXTENCODING_IBM_864 },
     649             :         { "864", RTL_TEXTENCODING_IBM_864 },
     650             :         { "csibm864", RTL_TEXTENCODING_IBM_864 },
     651             :         { "ibm865", RTL_TEXTENCODING_IBM_865 },
     652             :         { "cp865", RTL_TEXTENCODING_IBM_865 },
     653             :         { "865", RTL_TEXTENCODING_IBM_865 },
     654             :         { "csibm865", RTL_TEXTENCODING_IBM_865 },
     655             :         { "ibm866", RTL_TEXTENCODING_IBM_866 },
     656             :         { "cp866", RTL_TEXTENCODING_IBM_866 },
     657             :         { "866", RTL_TEXTENCODING_IBM_866 },
     658             :         { "csibm866", RTL_TEXTENCODING_IBM_866 },
     659             : /*        { "ibm868", RTL_TEXTENCODING_IBM_868 }, */
     660             : /*        { "cp868", RTL_TEXTENCODING_IBM_868 }, */
     661             : /*        { "cpar", RTL_TEXTENCODING_IBM_868 }, */
     662             : /*        { "csibm868", RTL_TEXTENCODING_IBM_868 }, */
     663             :         { "ibm869", RTL_TEXTENCODING_IBM_869 },
     664             :         { "cp869", RTL_TEXTENCODING_IBM_869 },
     665             :         { "869", RTL_TEXTENCODING_IBM_869 },
     666             :         { "cpgr", RTL_TEXTENCODING_IBM_869 },
     667             :         { "csibm869", RTL_TEXTENCODING_IBM_869 },
     668             :         { "ibm869", RTL_TEXTENCODING_IBM_869 },
     669             :         { "cp869", RTL_TEXTENCODING_IBM_869 },
     670             :         { "869", RTL_TEXTENCODING_IBM_869 },
     671             :         { "cpgr", RTL_TEXTENCODING_IBM_869 },
     672             :         { "csibm869", RTL_TEXTENCODING_IBM_869 },
     673             :         { "mac", RTL_TEXTENCODING_APPLE_ROMAN },
     674             :         { "csmacintosh", RTL_TEXTENCODING_APPLE_ROMAN },
     675             :         { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
     676             :         { "mskanji", RTL_TEXTENCODING_MS_932 },
     677             :         { "csshiftjis", RTL_TEXTENCODING_SHIFT_JIS },
     678             :         { "jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
     679             :         { "jisc62261983", RTL_TEXTENCODING_JIS_X_0208 },
     680             :         { "csiso87jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
     681             :         { "isoir86", RTL_TEXTENCODING_JIS_X_0208 },
     682             :         { "x0208", RTL_TEXTENCODING_JIS_X_0208 },
     683             :         { "jisx0201", RTL_TEXTENCODING_JIS_X_0201 },
     684             :         { "cshalfwidthkatakana", RTL_TEXTENCODING_JIS_X_0201 },
     685             :         { "x0201", RTL_TEXTENCODING_JIS_X_0201 },
     686             :         { "jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
     687             :         { "csiso159jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
     688             :         { "isoir159", RTL_TEXTENCODING_JIS_X_0208 },
     689             :         { "x0212", RTL_TEXTENCODING_JIS_X_0212 },
     690             :         { "isoir6", RTL_TEXTENCODING_ASCII_US },
     691             :         { "xsjis", RTL_TEXTENCODING_SHIFT_JIS },
     692             :         { "sjis", RTL_TEXTENCODING_SHIFT_JIS },
     693             :         { "ascii", RTL_TEXTENCODING_ASCII_US },
     694             :         { "us", RTL_TEXTENCODING_ASCII_US },
     695             :         { "gb180302000", RTL_TEXTENCODING_GB_18030 },
     696             :             /* This is no actual MIME character set name, it is only in here
     697             :                for backwards compatibility (before "GB18030" was officially
     698             :                registered with IANA, this code contained some guesses of what
     699             :                would become official names for GB18030). */
     700             :         { "gb18030", RTL_TEXTENCODING_GB_18030 },
     701             :         { "big5hkscs", RTL_TEXTENCODING_BIG5_HKSCS },
     702             :         { "tis620", RTL_TEXTENCODING_TIS_620 },
     703             :         { "gbk", RTL_TEXTENCODING_GBK },
     704             :         { "cp936", RTL_TEXTENCODING_GBK },
     705             :         { "ms936", RTL_TEXTENCODING_GBK },
     706             :         { "windows936", RTL_TEXTENCODING_GBK },
     707             :         { "cp874", RTL_TEXTENCODING_MS_874 },
     708             :             /* This is no official MIME character set name, but it might be in
     709             :                use in Thailand. */
     710             :         { "ms874", RTL_TEXTENCODING_MS_874 },
     711             :             /* This is no official MIME character set name, but it might be in
     712             :                use in Thailand. */
     713             :         { "windows874", RTL_TEXTENCODING_MS_874 },
     714             :             /* This is no official MIME character set name, but it might be in
     715             :                use in Thailand. */
     716             :         { "koi8u", RTL_TEXTENCODING_KOI8_U },
     717             :         { "cpis", RTL_TEXTENCODING_IBM_861 },
     718             :         { "ksc56011987", RTL_TEXTENCODING_MS_949 },
     719             :         { "isoir149", RTL_TEXTENCODING_MS_949 },
     720             :         { "ksc56011989", RTL_TEXTENCODING_MS_949 },
     721             :         { "ksc5601", RTL_TEXTENCODING_MS_949 },
     722             :         { "korean", RTL_TEXTENCODING_MS_949 },
     723             :         { "csksc56011987", RTL_TEXTENCODING_MS_949 },
     724             :             /* Map KS_C_5601-1987 and aliases to MS-949 instead of EUC-KR, as
     725             :                this character set identifier seems to be prominently used by MS
     726             :                to stand for KS C 5601 plus MS-949 extensions */
     727             :         { "latin9", RTL_TEXTENCODING_ISO_8859_15 },
     728             :         { "adobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
     729             :         { "csadobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
     730             :         { "adobesymbolencoding", RTL_TEXTENCODING_ADOBE_SYMBOL },
     731             :         { "cshppsmath", RTL_TEXTENCODING_ADOBE_SYMBOL },
     732             :         { "ptcp154", RTL_TEXTENCODING_PT154 },
     733             :         { "csptcp154", RTL_TEXTENCODING_PT154 },
     734             :         { "pt154", RTL_TEXTENCODING_PT154 },
     735             :         { "cp154", RTL_TEXTENCODING_PT154 },
     736             :         { "xisciide", RTL_TEXTENCODING_ISCII_DEVANAGARI },
     737             :             /* This is not an official MIME character set name, but is in use by
     738             :                various windows APIs. */
     739             :         { NULL, RTL_TEXTENCODING_DONTKNOW }
     740             :     };
     741             : 
     742           0 :     rtl_TextEncoding            eEncoding = RTL_TEXTENCODING_DONTKNOW;
     743             :     char*                   pBuf;
     744           0 :     const ImplStrCharsetDef*    pData = aVIPMimeCharsetTab;
     745           0 :     sal_uInt32                  nBufLen = strlen( pMimeCharset )+1;
     746             : 
     747             :     /* Alloc Buffer and map to lower case and remove non alphanumeric chars */
     748           0 :     pBuf = new char[nBufLen];
     749           0 :     Impl_toAsciiLowerAndRemoveNonAlphanumeric( pMimeCharset, pBuf );
     750             : 
     751             :     /* Search for equal in the VIP table */
     752           0 :     while ( pData->mpCharsetStr )
     753             :     {
     754           0 :         if ( strcmp( pBuf, pData->mpCharsetStr ) == 0 )
     755             :         {
     756           0 :             eEncoding = pData->meTextEncoding;
     757           0 :             break;
     758             :         }
     759             : 
     760           0 :         pData++;
     761             :     }
     762             : 
     763             :     /* Search for matching in the mime table */
     764           0 :     if ( eEncoding == RTL_TEXTENCODING_DONTKNOW )
     765             :     {
     766           0 :         pData = aMimeCharsetTab;
     767           0 :         while ( pData->mpCharsetStr )
     768             :         {
     769           0 :             if ( Impl_matchString( pBuf, pData->mpCharsetStr ) )
     770             :             {
     771           0 :                 eEncoding = pData->meTextEncoding;
     772           0 :                 break;
     773             :             }
     774             : 
     775           0 :             pData++;
     776             :         }
     777             :     }
     778             : 
     779           0 :     delete[] pBuf;
     780             : 
     781           0 :     return eEncoding;
     782             : }
     783             : 
     784             : /* ======================================================================= */
     785             : 
     786           0 : sal_uInt8 SAL_CALL rtl_getBestWindowsCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
     787             : {
     788           0 :     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
     789           0 :     if ( pData )
     790           0 :         return pData->mnBestWindowsCharset;
     791             :     else
     792           0 :         return 1;
     793             : }
     794             : 
     795             : /* ----------------------------------------------------------------------- */
     796             : 
     797           0 : const char* SAL_CALL rtl_getBestUnixCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding  )
     798             : {
     799           0 :     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
     800           0 :     if ( pData )
     801           0 :         return (char const *) pData->mpBestUnixCharset;
     802           0 :     else if( eTextEncoding == RTL_TEXTENCODING_UNICODE )
     803           0 :         return (char const *) "iso10646-1";
     804             :     else
     805           0 :         return 0;
     806             : }
     807             : 
     808             : /* ----------------------------------------------------------------------- */
     809             : 
     810           0 : char const * SAL_CALL rtl_getMimeCharsetFromTextEncoding(rtl_TextEncoding
     811             :                                                              nEncoding)
     812             : {
     813           0 :     ImplTextEncodingData const * p = Impl_getTextEncodingData(nEncoding);
     814           0 :     return p && (p->mnInfoFlags & RTL_TEXTENCODING_INFO_MIME) != 0 ?
     815           0 :                p->mpBestMimeCharset : NULL;
     816             : }
     817             : 
     818           0 : const char* SAL_CALL rtl_getBestMimeCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
     819             : {
     820           0 :     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
     821           0 :     if ( pData )
     822           0 :         return (char const *) pData->mpBestMimeCharset;
     823             :     else
     824           0 :         return 0;
     825             : }
     826             : 
     827             : /* The following two functions are based on <http://www.sharmahd.com/tm/
     828             :    codepages.html>, <http://msdn.microsoft.com/workshop/author/dhtml/reference/
     829             :    charsets/charset4.asp>, and <http://www.iana.org/assignments/character-sets>.
     830             :  */
     831             : 
     832             : rtl_TextEncoding SAL_CALL
     833           0 : rtl_getTextEncodingFromWindowsCodePage(sal_uInt32 nCodePage)
     834             : {
     835           0 :     switch (nCodePage)
     836             :     {
     837           0 :     case 437: return RTL_TEXTENCODING_IBM_437;
     838           0 :     case 708: return RTL_TEXTENCODING_ISO_8859_6;
     839           0 :     case 737: return RTL_TEXTENCODING_IBM_737;
     840           0 :     case 775: return RTL_TEXTENCODING_IBM_775;
     841           0 :     case 850: return RTL_TEXTENCODING_IBM_850;
     842           0 :     case 852: return RTL_TEXTENCODING_IBM_852;
     843           0 :     case 855: return RTL_TEXTENCODING_IBM_855;
     844           0 :     case 857: return RTL_TEXTENCODING_IBM_857;
     845           0 :     case 860: return RTL_TEXTENCODING_IBM_860;
     846           0 :     case 861: return RTL_TEXTENCODING_IBM_861;
     847           0 :     case 862: return RTL_TEXTENCODING_IBM_862;
     848           0 :     case 863: return RTL_TEXTENCODING_IBM_863;
     849           0 :     case 864: return RTL_TEXTENCODING_IBM_864;
     850           0 :     case 865: return RTL_TEXTENCODING_IBM_865;
     851           0 :     case 866: return RTL_TEXTENCODING_IBM_866;
     852           0 :     case 869: return RTL_TEXTENCODING_IBM_869;
     853           0 :     case 874: return RTL_TEXTENCODING_MS_874;
     854           0 :     case 932: return RTL_TEXTENCODING_MS_932;
     855           0 :     case 936: return RTL_TEXTENCODING_MS_936;
     856           0 :     case 949: return RTL_TEXTENCODING_MS_949;
     857           0 :     case 950: return RTL_TEXTENCODING_MS_950;
     858           0 :     case 1250: return RTL_TEXTENCODING_MS_1250;
     859           0 :     case 1251: return RTL_TEXTENCODING_MS_1251;
     860           0 :     case 1252: return RTL_TEXTENCODING_MS_1252;
     861           0 :     case 1253: return RTL_TEXTENCODING_MS_1253;
     862           0 :     case 1254: return RTL_TEXTENCODING_MS_1254;
     863           0 :     case 1255: return RTL_TEXTENCODING_MS_1255;
     864           0 :     case 1256: return RTL_TEXTENCODING_MS_1256;
     865           0 :     case 1257: return RTL_TEXTENCODING_MS_1257;
     866           0 :     case 1258: return RTL_TEXTENCODING_MS_1258;
     867           0 :     case 1361: return RTL_TEXTENCODING_MS_1361;
     868           0 :     case 10000: return RTL_TEXTENCODING_APPLE_ROMAN;
     869           0 :     case 10001: return RTL_TEXTENCODING_APPLE_JAPANESE;
     870           0 :     case 10002: return RTL_TEXTENCODING_APPLE_CHINTRAD;
     871           0 :     case 10003: return RTL_TEXTENCODING_APPLE_KOREAN;
     872           0 :     case 10004: return RTL_TEXTENCODING_APPLE_ARABIC;
     873           0 :     case 10005: return RTL_TEXTENCODING_APPLE_HEBREW;
     874           0 :     case 10006: return RTL_TEXTENCODING_APPLE_GREEK;
     875           0 :     case 10007: return RTL_TEXTENCODING_APPLE_CYRILLIC;
     876           0 :     case 10008: return RTL_TEXTENCODING_APPLE_CHINSIMP;
     877           0 :     case 10010: return RTL_TEXTENCODING_APPLE_ROMANIAN;
     878           0 :     case 10017: return RTL_TEXTENCODING_APPLE_UKRAINIAN;
     879           0 :     case 10029: return RTL_TEXTENCODING_APPLE_CENTEURO;
     880           0 :     case 10079: return RTL_TEXTENCODING_APPLE_ICELAND;
     881           0 :     case 10081: return RTL_TEXTENCODING_APPLE_TURKISH;
     882           0 :     case 10082: return RTL_TEXTENCODING_APPLE_CROATIAN;
     883           0 :     case 20127: return RTL_TEXTENCODING_ASCII_US;
     884           0 :     case 20866: return RTL_TEXTENCODING_KOI8_R;
     885           0 :     case 21866: return RTL_TEXTENCODING_KOI8_U;
     886           0 :     case 28591: return RTL_TEXTENCODING_ISO_8859_1;
     887           0 :     case 28592: return RTL_TEXTENCODING_ISO_8859_2;
     888           0 :     case 28593: return RTL_TEXTENCODING_ISO_8859_3;
     889           0 :     case 28594: return RTL_TEXTENCODING_ISO_8859_4;
     890           0 :     case 28595: return RTL_TEXTENCODING_ISO_8859_5;
     891           0 :     case 28596: return RTL_TEXTENCODING_ISO_8859_6;
     892           0 :     case 28597: return RTL_TEXTENCODING_ISO_8859_7;
     893           0 :     case 28598: return RTL_TEXTENCODING_ISO_8859_8;
     894           0 :     case 28599: return RTL_TEXTENCODING_ISO_8859_9;
     895           0 :     case 28605: return RTL_TEXTENCODING_ISO_8859_15;
     896           0 :     case 50220: return RTL_TEXTENCODING_ISO_2022_JP;
     897           0 :     case 50225: return RTL_TEXTENCODING_ISO_2022_KR;
     898           0 :     case 51932: return RTL_TEXTENCODING_EUC_JP;
     899           0 :     case 51936: return RTL_TEXTENCODING_EUC_CN;
     900           0 :     case 51949: return RTL_TEXTENCODING_EUC_KR;
     901           0 :     case 57002: return RTL_TEXTENCODING_ISCII_DEVANAGARI;
     902           0 :     case 65000: return RTL_TEXTENCODING_UTF7;
     903           0 :     case 65001: return RTL_TEXTENCODING_UTF8;
     904           0 :     default: return RTL_TEXTENCODING_DONTKNOW;
     905             :     }
     906             : }
     907             : 
     908             : sal_uInt32 SAL_CALL
     909           0 : rtl_getWindowsCodePageFromTextEncoding(rtl_TextEncoding nEncoding)
     910             : {
     911           0 :     switch (nEncoding)
     912             :     {
     913           0 :     case RTL_TEXTENCODING_IBM_437: return 437;
     914             :  /* case RTL_TEXTENCODING_ISO_8859_6: return 708; */
     915           0 :     case RTL_TEXTENCODING_IBM_737: return 737;
     916           0 :     case RTL_TEXTENCODING_IBM_775: return 775;
     917           0 :     case RTL_TEXTENCODING_IBM_850: return 850;
     918           0 :     case RTL_TEXTENCODING_IBM_852: return 852;
     919           0 :     case RTL_TEXTENCODING_IBM_855: return 855;
     920           0 :     case RTL_TEXTENCODING_IBM_857: return 857;
     921           0 :     case RTL_TEXTENCODING_IBM_860: return 860;
     922           0 :     case RTL_TEXTENCODING_IBM_861: return 861;
     923           0 :     case RTL_TEXTENCODING_IBM_862: return 862;
     924           0 :     case RTL_TEXTENCODING_IBM_863: return 863;
     925           0 :     case RTL_TEXTENCODING_IBM_864: return 864;
     926           0 :     case RTL_TEXTENCODING_IBM_865: return 865;
     927           0 :     case RTL_TEXTENCODING_IBM_866: return 866;
     928           0 :     case RTL_TEXTENCODING_IBM_869: return 869;
     929           0 :     case RTL_TEXTENCODING_MS_874: return 874;
     930           0 :     case RTL_TEXTENCODING_MS_932: return 932;
     931           0 :     case RTL_TEXTENCODING_MS_936: return 936;
     932           0 :     case RTL_TEXTENCODING_MS_949: return 949;
     933           0 :     case RTL_TEXTENCODING_MS_950: return 950;
     934           0 :     case RTL_TEXTENCODING_MS_1250: return 1250;
     935           0 :     case RTL_TEXTENCODING_MS_1251: return 1251;
     936           0 :     case RTL_TEXTENCODING_MS_1252: return 1252;
     937           0 :     case RTL_TEXTENCODING_MS_1253: return 1253;
     938           0 :     case RTL_TEXTENCODING_MS_1254: return 1254;
     939           0 :     case RTL_TEXTENCODING_MS_1255: return 1255;
     940           0 :     case RTL_TEXTENCODING_MS_1256: return 1256;
     941           0 :     case RTL_TEXTENCODING_MS_1257: return 1257;
     942           0 :     case RTL_TEXTENCODING_MS_1258: return 1258;
     943           0 :     case RTL_TEXTENCODING_MS_1361: return 1361;
     944           0 :     case RTL_TEXTENCODING_APPLE_ROMAN: return 10000;
     945           0 :     case RTL_TEXTENCODING_APPLE_JAPANESE: return 10001;
     946           0 :     case RTL_TEXTENCODING_APPLE_CHINTRAD: return 10002;
     947           0 :     case RTL_TEXTENCODING_APPLE_KOREAN: return 10003;
     948           0 :     case RTL_TEXTENCODING_APPLE_ARABIC: return 10004;
     949           0 :     case RTL_TEXTENCODING_APPLE_HEBREW: return 10005;
     950           0 :     case RTL_TEXTENCODING_APPLE_GREEK: return 10006;
     951           0 :     case RTL_TEXTENCODING_APPLE_CYRILLIC: return 10007;
     952           0 :     case RTL_TEXTENCODING_APPLE_CHINSIMP: return 10008;
     953           0 :     case RTL_TEXTENCODING_APPLE_ROMANIAN: return 10010;
     954           0 :     case RTL_TEXTENCODING_APPLE_UKRAINIAN: return 10017;
     955           0 :     case RTL_TEXTENCODING_APPLE_CENTEURO: return 10029;
     956           0 :     case RTL_TEXTENCODING_APPLE_ICELAND: return 10079;
     957           0 :     case RTL_TEXTENCODING_APPLE_TURKISH: return 10081;
     958           0 :     case RTL_TEXTENCODING_APPLE_CROATIAN: return 10082;
     959           0 :     case RTL_TEXTENCODING_ASCII_US: return 20127;
     960           0 :     case RTL_TEXTENCODING_KOI8_R: return 20866;
     961           0 :     case RTL_TEXTENCODING_KOI8_U: return 21866;
     962           0 :     case RTL_TEXTENCODING_ISO_8859_1: return 28591;
     963           0 :     case RTL_TEXTENCODING_ISO_8859_2: return 28592;
     964           0 :     case RTL_TEXTENCODING_ISO_8859_3: return 28593;
     965           0 :     case RTL_TEXTENCODING_ISO_8859_4: return 28594;
     966           0 :     case RTL_TEXTENCODING_ISO_8859_5: return 28595;
     967           0 :     case RTL_TEXTENCODING_ISO_8859_6: return 28596;
     968           0 :     case RTL_TEXTENCODING_ISO_8859_7: return 28597;
     969           0 :     case RTL_TEXTENCODING_ISO_8859_8: return 28598;
     970           0 :     case RTL_TEXTENCODING_ISO_8859_9: return 28599;
     971           0 :     case RTL_TEXTENCODING_ISO_8859_15: return 28605;
     972           0 :     case RTL_TEXTENCODING_ISO_2022_JP: return 50220;
     973           0 :     case RTL_TEXTENCODING_ISO_2022_KR: return 50225;
     974           0 :     case RTL_TEXTENCODING_EUC_JP: return 51932;
     975           0 :     case RTL_TEXTENCODING_EUC_CN: return 51936;
     976           0 :     case RTL_TEXTENCODING_EUC_KR: return 51949;
     977           0 :     case RTL_TEXTENCODING_ISCII_DEVANAGARI: return 57002;
     978           0 :     case RTL_TEXTENCODING_UTF7: return 65000;
     979           0 :     case RTL_TEXTENCODING_UTF8: return 65001;
     980           0 :     default: return 0;
     981             :     }
     982             : }
     983             : 
     984             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10