Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "sal/config.h"
21 :
22 : #include <cstddef>
23 : #include <cstring>
24 :
25 : #include "rtl/tencinfo.h"
26 :
27 : #include "gettextencodingdata.hxx"
28 : #include "tenchelp.hxx"
29 :
30 5 : sal_Bool SAL_CALL rtl_isOctetTextEncoding(rtl_TextEncoding nEncoding)
31 : {
32 : return
33 : nEncoding > RTL_TEXTENCODING_DONTKNOW
34 : && nEncoding != 9 // RTL_TEXTENCODING_SYSTEM
35 5 : && nEncoding <= RTL_TEXTENCODING_ADOBE_DINGBATS; // always update this!
36 : }
37 :
38 : /* ======================================================================= */
39 :
40 110 : static void Impl_toAsciiLower( const char* pName, char* pBuf )
41 : {
42 1168 : while ( *pName )
43 : {
44 : /* A-Z */
45 948 : if ( (*pName >= 0x41) && (*pName <= 0x5A) )
46 90 : *pBuf = (*pName)+0x20; /* toAsciiLower */
47 : else
48 858 : *pBuf = *pName;
49 :
50 948 : pBuf++;
51 948 : pName++;
52 : }
53 :
54 110 : *pBuf = '\0';
55 110 : }
56 :
57 : /* ----------------------------------------------------------------------- */
58 :
59 282 : static void Impl_toAsciiLowerAndRemoveNonAlphanumeric( const char* pName, char* pBuf )
60 : {
61 3112 : while ( *pName )
62 : {
63 : /* A-Z */
64 2548 : if ( (*pName >= 0x41) && (*pName <= 0x5A) )
65 : {
66 206 : *pBuf = (*pName)+0x20; /* toAsciiLower */
67 206 : pBuf++;
68 : }
69 : /* a-z, 0-9 */
70 2342 : else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) ||
71 : ((*pName >= 0x30) && (*pName <= 0x39)) )
72 : {
73 2073 : *pBuf = *pName;
74 2073 : pBuf++;
75 : }
76 :
77 2548 : pName++;
78 : }
79 :
80 282 : *pBuf = '\0';
81 282 : }
82 :
83 : /* ----------------------------------------------------------------------- */
84 :
85 : /* pMatchStr must match with all characters in pCompStr */
86 11397 : static bool Impl_matchString( const char* pCompStr, const char* pMatchStr )
87 : {
88 : /* We test only for end in MatchStr, because the last 0 character from */
89 : /* pCompStr is unequal a character in MatchStr, so the loop terminates */
90 27925 : while ( *pMatchStr )
91 : {
92 16293 : if ( *pCompStr != *pMatchStr )
93 11162 : return false;
94 :
95 5131 : pCompStr++;
96 5131 : pMatchStr++;
97 : }
98 :
99 235 : return true;
100 : }
101 :
102 : /* ======================================================================= */
103 :
104 : struct ImplStrCharsetDef
105 : {
106 : const char* mpCharsetStr;
107 : rtl_TextEncoding meTextEncoding;
108 : };
109 :
110 : struct ImplStrFirstPartCharsetDef
111 : {
112 : const char* mpCharsetStr;
113 : const ImplStrCharsetDef* mpSecondPartTab;
114 : };
115 :
116 : /* ======================================================================= */
117 :
118 104304 : sal_Bool SAL_CALL rtl_getTextEncodingInfo( rtl_TextEncoding eTextEncoding, rtl_TextEncodingInfo* pEncInfo )
119 : {
120 : const ImplTextEncodingData* pData;
121 :
122 104304 : pData = Impl_getTextEncodingData( eTextEncoding );
123 104304 : if ( !pData )
124 : {
125 : /* HACK: For not implemented encoding, because not all
126 : calls handle the errors */
127 91 : if ( pEncInfo->StructSize < 5 )
128 1 : return false;
129 90 : pEncInfo->MinimumCharSize = 1;
130 :
131 90 : if ( pEncInfo->StructSize < 6 )
132 1 : return true;
133 89 : pEncInfo->MaximumCharSize = 1;
134 :
135 89 : if ( pEncInfo->StructSize < 7 )
136 1 : return true;
137 88 : pEncInfo->AverageCharSize = 1;
138 :
139 88 : if ( pEncInfo->StructSize < 12 )
140 1 : return true;
141 87 : pEncInfo->Flags = 0;
142 :
143 87 : return false;
144 : }
145 :
146 104213 : if ( pEncInfo->StructSize < 5 )
147 1 : return false;
148 104212 : pEncInfo->MinimumCharSize = pData->mnMinCharSize;
149 :
150 104212 : if ( pEncInfo->StructSize < 6 )
151 1 : return true;
152 104211 : pEncInfo->MaximumCharSize = pData->mnMaxCharSize;
153 :
154 104211 : if ( pEncInfo->StructSize < 7 )
155 1 : return true;
156 104210 : pEncInfo->AverageCharSize = pData->mnAveCharSize;
157 :
158 104210 : if ( pEncInfo->StructSize < 12 )
159 1 : return true;
160 104209 : pEncInfo->Flags = pData->mnInfoFlags;
161 :
162 104209 : return true;
163 : }
164 :
165 : /* ======================================================================= */
166 :
167 4251 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromWindowsCharset( sal_uInt8 nWinCharset )
168 : {
169 : rtl_TextEncoding eTextEncoding;
170 :
171 4251 : switch ( nWinCharset )
172 : {
173 2215 : case 0: eTextEncoding = RTL_TEXTENCODING_MS_1252; break; /* ANSI_CHARSET */
174 377 : case 2: eTextEncoding = RTL_TEXTENCODING_SYMBOL; break; /* SYMBOL_CHARSET */
175 1 : case 77: eTextEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break;/* MAC_CHARSET */
176 120 : case 128: eTextEncoding = RTL_TEXTENCODING_MS_932; break; /* SHIFTJIS_CHARSET */
177 3 : case 129: eTextEncoding = RTL_TEXTENCODING_MS_949; break; /* HANGEUL_CHARSET */
178 2 : case 130: eTextEncoding = RTL_TEXTENCODING_MS_1361; break; /* JOHAB_CHARSET */
179 109 : case 134: eTextEncoding = RTL_TEXTENCODING_MS_936; break; /* GB2312_CHARSET */
180 5 : case 136: eTextEncoding = RTL_TEXTENCODING_MS_950; break; /* CHINESEBIG5_CHARSET */
181 5 : case 161: eTextEncoding = RTL_TEXTENCODING_MS_1253; break; /* GREEK_CHARSET */
182 4 : case 162: eTextEncoding = RTL_TEXTENCODING_MS_1254; break; /* TURKISH_CHARSET */
183 1 : case 163: eTextEncoding = RTL_TEXTENCODING_MS_1258; break; /* VIETNAMESE_CHARSET !!! */
184 3 : case 177: eTextEncoding = RTL_TEXTENCODING_MS_1255; break; /* HEBREW_CHARSET */
185 1230 : case 178: eTextEncoding = RTL_TEXTENCODING_MS_1256; break; /* ARABIC_CHARSET */
186 3 : case 186: eTextEncoding = RTL_TEXTENCODING_MS_1257; break; /* BALTIC_CHARSET */
187 12 : case 204: eTextEncoding = RTL_TEXTENCODING_MS_1251; break; /* RUSSIAN_CHARSET */
188 2 : case 222: eTextEncoding = RTL_TEXTENCODING_MS_874; break; /* THAI_CHARSET */
189 42 : case 238: eTextEncoding = RTL_TEXTENCODING_MS_1250; break; /* EASTEUROPE_CHARSET */
190 0 : case 255: eTextEncoding = RTL_TEXTENCODING_IBM_850; break; /* OEM_CHARSET */
191 117 : default: eTextEncoding = RTL_TEXTENCODING_DONTKNOW; break;
192 : };
193 :
194 4251 : return eTextEncoding;
195 : }
196 :
197 : /* ----------------------------------------------------------------------- */
198 :
199 110 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromUnixCharset( const char* pUnixCharset )
200 : {
201 : /* See <ftp://ftp.x.org/pub/DOCS/registry>, section 14 ("Font Charset
202 : * (Registry and Encoding) Names").
203 : */
204 :
205 : /* All Identifiers in the tables are lower case The function search */
206 : /* for the first matching string in the tables. */
207 : /* Sort order: unique (first 14, than 1), important */
208 :
209 : static ImplStrCharsetDef const aUnixCharsetISOTab[] =
210 : {
211 : { "15", RTL_TEXTENCODING_ISO_8859_15 },
212 : { "14", RTL_TEXTENCODING_ISO_8859_14 },
213 : { "13", RTL_TEXTENCODING_ISO_8859_13 },
214 : { "11", RTL_TEXTENCODING_TIS_620 },
215 : { "10", RTL_TEXTENCODING_ISO_8859_10 },
216 : { "1", RTL_TEXTENCODING_ISO_8859_1 },
217 : { "2", RTL_TEXTENCODING_ISO_8859_2 },
218 : { "3", RTL_TEXTENCODING_ISO_8859_3 },
219 : { "4", RTL_TEXTENCODING_ISO_8859_4 },
220 : { "5", RTL_TEXTENCODING_ISO_8859_5 },
221 : { "6", RTL_TEXTENCODING_ISO_8859_6 },
222 : { "7", RTL_TEXTENCODING_ISO_8859_7 },
223 : { "8", RTL_TEXTENCODING_ISO_8859_8 },
224 : { "9", RTL_TEXTENCODING_ISO_8859_9 },
225 : { NULL, RTL_TEXTENCODING_DONTKNOW }
226 : };
227 :
228 : static ImplStrCharsetDef const aUnixCharsetADOBETab[] =
229 : {
230 : { "fontspecific", RTL_TEXTENCODING_SYMBOL },
231 : { NULL, RTL_TEXTENCODING_DONTKNOW }
232 : };
233 :
234 : static ImplStrCharsetDef const aUnixCharsetMSTab[] =
235 : {
236 : { "1252", RTL_TEXTENCODING_MS_1252 },
237 : { "1250", RTL_TEXTENCODING_MS_1250 },
238 : { "1251", RTL_TEXTENCODING_MS_1251 },
239 : { "1253", RTL_TEXTENCODING_MS_1253 },
240 : { "1254", RTL_TEXTENCODING_MS_1254 },
241 : { "1255", RTL_TEXTENCODING_MS_1255 },
242 : { "1256", RTL_TEXTENCODING_MS_1256 },
243 : { "1257", RTL_TEXTENCODING_MS_1257 },
244 : { "1258", RTL_TEXTENCODING_MS_1258 },
245 : { "932", RTL_TEXTENCODING_MS_932 },
246 : { "936", RTL_TEXTENCODING_MS_936 },
247 : { "949", RTL_TEXTENCODING_MS_949 },
248 : { "950", RTL_TEXTENCODING_MS_950 },
249 : { "1361", RTL_TEXTENCODING_MS_1361 },
250 : { "cp1252", RTL_TEXTENCODING_MS_1252 },
251 : { "cp1250", RTL_TEXTENCODING_MS_1250 },
252 : { "cp1251", RTL_TEXTENCODING_MS_1251 },
253 : { "cp1253", RTL_TEXTENCODING_MS_1253 },
254 : { "cp1254", RTL_TEXTENCODING_MS_1254 },
255 : { "cp1255", RTL_TEXTENCODING_MS_1255 },
256 : { "cp1256", RTL_TEXTENCODING_MS_1256 },
257 : { "cp1257", RTL_TEXTENCODING_MS_1257 },
258 : { "cp1258", RTL_TEXTENCODING_MS_1258 },
259 : { "cp932", RTL_TEXTENCODING_MS_932 },
260 : { "cp936", RTL_TEXTENCODING_MS_936 },
261 : { "cp949", RTL_TEXTENCODING_MS_949 },
262 : { "cp950", RTL_TEXTENCODING_MS_950 },
263 : { "cp1361", RTL_TEXTENCODING_MS_1361 },
264 : { NULL, RTL_TEXTENCODING_DONTKNOW }
265 : };
266 :
267 : static ImplStrCharsetDef const aUnixCharsetIBMTab[] =
268 : {
269 : { "437", RTL_TEXTENCODING_IBM_437 },
270 : { "850", RTL_TEXTENCODING_IBM_850 },
271 : { "860", RTL_TEXTENCODING_IBM_860 },
272 : { "861", RTL_TEXTENCODING_IBM_861 },
273 : { "863", RTL_TEXTENCODING_IBM_863 },
274 : { "865", RTL_TEXTENCODING_IBM_865 },
275 : { "737", RTL_TEXTENCODING_IBM_737 },
276 : { "775", RTL_TEXTENCODING_IBM_775 },
277 : { "852", RTL_TEXTENCODING_IBM_852 },
278 : { "855", RTL_TEXTENCODING_IBM_855 },
279 : { "857", RTL_TEXTENCODING_IBM_857 },
280 : { "862", RTL_TEXTENCODING_IBM_862 },
281 : { "864", RTL_TEXTENCODING_IBM_864 },
282 : { "866", RTL_TEXTENCODING_IBM_866 },
283 : { "869", RTL_TEXTENCODING_IBM_869 },
284 : { "874", RTL_TEXTENCODING_MS_874 },
285 : { "1004", RTL_TEXTENCODING_MS_1252 },
286 : { "65400", RTL_TEXTENCODING_SYMBOL },
287 : { NULL, RTL_TEXTENCODING_DONTKNOW }
288 : };
289 :
290 : static ImplStrCharsetDef const aUnixCharsetKOI8Tab[] =
291 : {
292 : { "r", RTL_TEXTENCODING_KOI8_R },
293 : { "u", RTL_TEXTENCODING_KOI8_U },
294 : { NULL, RTL_TEXTENCODING_DONTKNOW }
295 : };
296 :
297 : static ImplStrCharsetDef aUnixCharsetJISX0208Tab[] =
298 : {
299 : { NULL, RTL_TEXTENCODING_JIS_X_0208 }
300 : };
301 :
302 : static ImplStrCharsetDef aUnixCharsetJISX0201Tab[] =
303 : {
304 : { NULL, RTL_TEXTENCODING_JIS_X_0201 }
305 : };
306 :
307 : static ImplStrCharsetDef aUnixCharsetJISX0212Tab[] =
308 : {
309 : { NULL, RTL_TEXTENCODING_JIS_X_0212 }
310 : };
311 :
312 : static ImplStrCharsetDef aUnixCharsetGBTab[] =
313 : {
314 : { NULL, RTL_TEXTENCODING_GB_2312 }
315 : };
316 :
317 : static ImplStrCharsetDef aUnixCharsetGBKTab[] =
318 : {
319 : { NULL, RTL_TEXTENCODING_GBK }
320 : };
321 :
322 : static ImplStrCharsetDef aUnixCharsetBIG5Tab[] =
323 : {
324 : { NULL, RTL_TEXTENCODING_BIG5 }
325 : };
326 :
327 : static ImplStrCharsetDef const aUnixCharsetKSC56011987Tab[] =
328 : {
329 : { NULL, RTL_TEXTENCODING_EUC_KR }
330 : };
331 :
332 : static ImplStrCharsetDef const aUnixCharsetKSC56011992Tab[] =
333 : {
334 : { NULL, RTL_TEXTENCODING_MS_1361 }
335 : };
336 :
337 : static ImplStrCharsetDef const aUnixCharsetISO10646Tab[] =
338 : {
339 : { NULL, RTL_TEXTENCODING_UNICODE }
340 : };
341 :
342 : static ImplStrCharsetDef const aUnixCharsetUNICODETab[] =
343 : {
344 : /* Currently every Unicode Encoding is for us Unicode */
345 : /* { "fontspecific", RTL_TEXTENCODING_UNICODE }, */
346 : { NULL, RTL_TEXTENCODING_UNICODE }
347 : };
348 :
349 : static ImplStrCharsetDef const aUnixCharsetSymbolTab[] =
350 : {
351 : { NULL, RTL_TEXTENCODING_SYMBOL }
352 : };
353 :
354 : /* See <http://cvs.freedesktop.org/xorg/xc/fonts/encodings/iso8859-11.enc?
355 : rev=1.1.1.1>: */
356 : static ImplStrCharsetDef const aUnixCharsetTIS620Tab[] =
357 : {
358 : { "0", RTL_TEXTENCODING_TIS_620 },
359 : { "2529", RTL_TEXTENCODING_TIS_620 },
360 : { "2533", RTL_TEXTENCODING_TIS_620 },
361 : { NULL, RTL_TEXTENCODING_DONTKNOW }
362 : };
363 : static ImplStrCharsetDef const aUnixCharsetTIS6202529Tab[] =
364 : {
365 : { "1", RTL_TEXTENCODING_TIS_620 },
366 : { NULL, RTL_TEXTENCODING_DONTKNOW }
367 : };
368 : static ImplStrCharsetDef const aUnixCharsetTIS6202533Tab[] =
369 : {
370 : { "0", RTL_TEXTENCODING_TIS_620 },
371 : { "1", RTL_TEXTENCODING_TIS_620 },
372 : { NULL, RTL_TEXTENCODING_DONTKNOW }
373 : };
374 :
375 : static ImplStrFirstPartCharsetDef const aUnixCharsetFirstPartTab[] =
376 : {
377 : { "iso8859", aUnixCharsetISOTab },
378 : { "adobe", aUnixCharsetADOBETab },
379 : { "ansi", aUnixCharsetMSTab },
380 : { "microsoft", aUnixCharsetMSTab },
381 : { "ibm", aUnixCharsetIBMTab },
382 : { "koi8", aUnixCharsetKOI8Tab },
383 : { "jisx0208", aUnixCharsetJISX0208Tab },
384 : { "jisx0208.1983", aUnixCharsetJISX0208Tab },
385 : { "jisx0201", aUnixCharsetJISX0201Tab },
386 : { "jisx0201.1976", aUnixCharsetJISX0201Tab },
387 : { "jisx0212", aUnixCharsetJISX0212Tab },
388 : { "jisx0212.1990", aUnixCharsetJISX0212Tab },
389 : { "gb2312", aUnixCharsetGBTab },
390 : { "gbk", aUnixCharsetGBKTab },
391 : { "big5", aUnixCharsetBIG5Tab },
392 : { "iso10646", aUnixCharsetISO10646Tab },
393 : /* { "unicode", aUnixCharsetUNICODETab }, */ /* fonts contain only default chars */
394 : { "sunolcursor", aUnixCharsetSymbolTab },
395 : { "sunolglyph", aUnixCharsetSymbolTab },
396 : { "iso10646", aUnixCharsetUNICODETab },
397 : { "ksc5601.1987", aUnixCharsetKSC56011987Tab },
398 : { "ksc5601.1992", aUnixCharsetKSC56011992Tab },
399 : { "tis620.2529", aUnixCharsetTIS6202529Tab },
400 : { "tis620.2533", aUnixCharsetTIS6202533Tab },
401 : { "tis620", aUnixCharsetTIS620Tab },
402 : /* { "sunudcja.1997", }, */
403 : /* { "sunudcko.1997", }, */
404 : /* { "sunudczh.1997", }, */
405 : /* { "sunudczhtw.1997", }, */
406 : { NULL, NULL }
407 : };
408 :
409 110 : rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
410 : char* pBuf;
411 : char* pTempBuf;
412 110 : sal_uInt32 nBufLen = strlen( pUnixCharset )+1;
413 : const char* pFirstPart;
414 : const char* pSecondPart;
415 :
416 : /* Alloc Buffer and map to lower case */
417 110 : pBuf = new char[nBufLen];
418 110 : Impl_toAsciiLower( pUnixCharset, pBuf );
419 :
420 : /* Search FirstPart */
421 110 : pFirstPart = pBuf;
422 110 : pSecondPart = NULL;
423 110 : pTempBuf = pBuf;
424 905 : while ( *pTempBuf )
425 : {
426 791 : if ( *pTempBuf == '-' )
427 : {
428 106 : *pTempBuf = '\0';
429 106 : pSecondPart = pTempBuf+1;
430 106 : break;
431 : }
432 :
433 685 : pTempBuf++;
434 : }
435 :
436 : /* Parttrenner gefunden */
437 110 : if ( pSecondPart )
438 : {
439 : /* Search for the part tab */
440 106 : const ImplStrFirstPartCharsetDef* pFirstPartData = aUnixCharsetFirstPartTab;
441 1057 : while ( pFirstPartData->mpCharsetStr )
442 : {
443 926 : if ( Impl_matchString( pFirstPart, pFirstPartData->mpCharsetStr ) )
444 : {
445 : /* Search for the charset in the second part tab */
446 81 : const ImplStrCharsetDef* pData = pFirstPartData->mpSecondPartTab;
447 674 : while ( pData->mpCharsetStr )
448 : {
449 586 : if ( Impl_matchString( pSecondPart, pData->mpCharsetStr ) )
450 : {
451 74 : eEncoding = pData->meTextEncoding;
452 74 : break;
453 : }
454 :
455 512 : pData++;
456 : }
457 :
458 : /* use default encoding for first part */
459 81 : eEncoding = pData->meTextEncoding;
460 81 : break;
461 : }
462 :
463 845 : pFirstPartData++;
464 : }
465 : }
466 :
467 110 : delete[] pBuf;
468 :
469 110 : return eEncoding;
470 : }
471 :
472 : /* ----------------------------------------------------------------------- */
473 :
474 282 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromMimeCharset( const char* pMimeCharset )
475 : {
476 : /* All Identifiers are in lower case and contain only alphanumeric */
477 : /* characters. The function search for the first equal string in */
478 : /* the table. In this table are only the most used mime types. */
479 : /* Sort order: important */
480 : static ImplStrCharsetDef const aVIPMimeCharsetTab[] =
481 : {
482 : { "usascii", RTL_TEXTENCODING_ASCII_US },
483 : { "utf8", RTL_TEXTENCODING_UTF8 },
484 : { "utf7", RTL_TEXTENCODING_UTF7 },
485 : { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
486 : { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
487 : { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
488 : { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
489 : { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
490 : { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
491 : { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
492 : { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
493 : { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
494 : { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
495 : { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
496 : { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
497 : { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
498 : { "iso2022jp", RTL_TEXTENCODING_ISO_2022_JP },
499 : { "iso2022jp2", RTL_TEXTENCODING_ISO_2022_JP },
500 : { "iso2022cn", RTL_TEXTENCODING_ISO_2022_CN },
501 : { "iso2022cnext", RTL_TEXTENCODING_ISO_2022_CN },
502 : { "iso2022kr", RTL_TEXTENCODING_ISO_2022_KR },
503 : { "eucjp", RTL_TEXTENCODING_EUC_JP },
504 : { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
505 : { "mskanji", RTL_TEXTENCODING_MS_932 },
506 : { "gb2312", RTL_TEXTENCODING_GB_2312 },
507 : { "cngb", RTL_TEXTENCODING_GB_2312 },
508 : { "big5", RTL_TEXTENCODING_BIG5 },
509 : { "cnbig5", RTL_TEXTENCODING_BIG5 },
510 : { "cngb12345", RTL_TEXTENCODING_GBT_12345 },
511 : { "euckr", RTL_TEXTENCODING_EUC_KR },
512 : { "koi8r", RTL_TEXTENCODING_KOI8_R },
513 : { "windows1252", RTL_TEXTENCODING_MS_1252 },
514 : { "windows1250", RTL_TEXTENCODING_MS_1250 },
515 : { "windows1251", RTL_TEXTENCODING_MS_1251 },
516 : { "windows1253", RTL_TEXTENCODING_MS_1253 },
517 : { "windows1254", RTL_TEXTENCODING_MS_1254 },
518 : { "windows1255", RTL_TEXTENCODING_MS_1255 },
519 : { "windows1256", RTL_TEXTENCODING_MS_1256 },
520 : { "windows1257", RTL_TEXTENCODING_MS_1257 },
521 : { "windows1258", RTL_TEXTENCODING_MS_1258 },
522 : { NULL, RTL_TEXTENCODING_DONTKNOW }
523 : };
524 :
525 : /* All Identifiers are in lower case and contain only alphanumeric */
526 : /* characters. The function search for the first matching string in */
527 : /* the table. */
528 : /* Sort order: unique (first iso885914, than iso88591), important */
529 : static ImplStrCharsetDef const aMimeCharsetTab[] =
530 : {
531 : { "unicode11utf7", RTL_TEXTENCODING_UTF7 },
532 : { "caunicode11utf7", RTL_TEXTENCODING_UTF7 },
533 : { "iso88591windows30", RTL_TEXTENCODING_ISO_8859_1 },
534 : { "iso88591win", RTL_TEXTENCODING_MS_1252 },
535 : { "iso88592win", RTL_TEXTENCODING_MS_1250 },
536 : { "iso88599win", RTL_TEXTENCODING_MS_1254 },
537 : { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
538 : { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
539 : { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
540 : { "iso885911", RTL_TEXTENCODING_TIS_620 },
541 : /* This is no official MIME character set name, but it might be in
542 : use in Thailand. */
543 : { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
544 : { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
545 : { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
546 : { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
547 : { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
548 : { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
549 : { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
550 : { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
551 : { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
552 : { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
553 : { "isoir100", RTL_TEXTENCODING_ISO_8859_1 },
554 : { "latin1", RTL_TEXTENCODING_ISO_8859_1 },
555 : { "l1", RTL_TEXTENCODING_ISO_8859_1 },
556 : { "cp819", RTL_TEXTENCODING_ISO_8859_1 },
557 : { "ibm819", RTL_TEXTENCODING_ISO_8859_1 },
558 : { "csisolatin1", RTL_TEXTENCODING_ISO_8859_1 },
559 : { "isoir101", RTL_TEXTENCODING_ISO_8859_2 },
560 : { "latin2", RTL_TEXTENCODING_ISO_8859_2 },
561 : { "l2", RTL_TEXTENCODING_ISO_8859_2 },
562 : { "csisolatin2", RTL_TEXTENCODING_ISO_8859_2 },
563 : { "isoir109", RTL_TEXTENCODING_ISO_8859_3 },
564 : { "latin3", RTL_TEXTENCODING_ISO_8859_3 },
565 : { "l3", RTL_TEXTENCODING_ISO_8859_3 },
566 : { "csisolatin3", RTL_TEXTENCODING_ISO_8859_3 },
567 : { "isoir110", RTL_TEXTENCODING_ISO_8859_4 },
568 : { "latin4", RTL_TEXTENCODING_ISO_8859_4 },
569 : { "l4", RTL_TEXTENCODING_ISO_8859_4 },
570 : { "csisolatin4", RTL_TEXTENCODING_ISO_8859_4 },
571 : { "isoir144", RTL_TEXTENCODING_ISO_8859_5 },
572 : { "cyrillicasian", RTL_TEXTENCODING_PT154 },
573 : { "cyrillic", RTL_TEXTENCODING_ISO_8859_5 },
574 : { "csisolatincyrillic", RTL_TEXTENCODING_ISO_8859_5 },
575 : { "isoir127", RTL_TEXTENCODING_ISO_8859_6 },
576 : { "arabic", RTL_TEXTENCODING_ISO_8859_6 },
577 : { "csisolatinarabic", RTL_TEXTENCODING_ISO_8859_6 },
578 : { "ecma114", RTL_TEXTENCODING_ISO_8859_6 },
579 : { "asmo708", RTL_TEXTENCODING_ISO_8859_6 },
580 : { "isoir126", RTL_TEXTENCODING_ISO_8859_7 },
581 : { "greek", RTL_TEXTENCODING_ISO_8859_7 },
582 : { "csisolatingreek", RTL_TEXTENCODING_ISO_8859_7 },
583 : { "elot928", RTL_TEXTENCODING_ISO_8859_7 },
584 : { "ecma118", RTL_TEXTENCODING_ISO_8859_7 },
585 : { "isoir138", RTL_TEXTENCODING_ISO_8859_8 },
586 : { "hebrew", RTL_TEXTENCODING_ISO_8859_8 },
587 : { "csisolatinhebrew", RTL_TEXTENCODING_ISO_8859_8 },
588 : { "isoir148", RTL_TEXTENCODING_ISO_8859_9 },
589 : { "latin5", RTL_TEXTENCODING_ISO_8859_9 },
590 : { "l5", RTL_TEXTENCODING_ISO_8859_9 },
591 : { "csisolatin5", RTL_TEXTENCODING_ISO_8859_9 },
592 : { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
593 : { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
594 : { "cswindows31latin1", RTL_TEXTENCODING_MS_1252 },
595 : { "cswindows31latin2", RTL_TEXTENCODING_MS_1250 },
596 : { "cswindows31latin5", RTL_TEXTENCODING_MS_1254 },
597 : { "iso10646us", RTL_TEXTENCODING_ASCII_US },
598 : { "iso646irv", RTL_TEXTENCODING_ASCII_US },
599 : { "cskoi8r", RTL_TEXTENCODING_KOI8_R },
600 : { "ibm437", RTL_TEXTENCODING_IBM_437 },
601 : { "cp437", RTL_TEXTENCODING_IBM_437 },
602 : { "437", RTL_TEXTENCODING_IBM_437 },
603 : { "cspc8codepage437", RTL_TEXTENCODING_IBM_437 },
604 : { "ansix34", RTL_TEXTENCODING_ASCII_US },
605 : { "ibm367", RTL_TEXTENCODING_ASCII_US },
606 : { "cp367", RTL_TEXTENCODING_ASCII_US },
607 : { "csascii", RTL_TEXTENCODING_ASCII_US },
608 : { "ibm775", RTL_TEXTENCODING_IBM_775 },
609 : { "cp775", RTL_TEXTENCODING_IBM_775 },
610 : { "cspc775baltic", RTL_TEXTENCODING_IBM_775 },
611 : { "ibm850", RTL_TEXTENCODING_IBM_850 },
612 : { "cp850", RTL_TEXTENCODING_IBM_850 },
613 : { "850", RTL_TEXTENCODING_IBM_850 },
614 : { "cspc850multilingual", RTL_TEXTENCODING_IBM_850 },
615 : /* { "ibm851", RTL_TEXTENCODING_IBM_851 }, */
616 : /* { "cp851", RTL_TEXTENCODING_IBM_851 }, */
617 : /* { "851", RTL_TEXTENCODING_IBM_851 }, */
618 : /* { "csibm851", RTL_TEXTENCODING_IBM_851 }, */
619 : { "ibm852", RTL_TEXTENCODING_IBM_852 },
620 : { "cp852", RTL_TEXTENCODING_IBM_852 },
621 : { "852", RTL_TEXTENCODING_IBM_852 },
622 : { "cspcp852", RTL_TEXTENCODING_IBM_852 },
623 : { "ibm855", RTL_TEXTENCODING_IBM_855 },
624 : { "cp855", RTL_TEXTENCODING_IBM_855 },
625 : { "855", RTL_TEXTENCODING_IBM_855 },
626 : { "csibm855", RTL_TEXTENCODING_IBM_855 },
627 : { "ibm857", RTL_TEXTENCODING_IBM_857 },
628 : { "cp857", RTL_TEXTENCODING_IBM_857 },
629 : { "857", RTL_TEXTENCODING_IBM_857 },
630 : { "csibm857", RTL_TEXTENCODING_IBM_857 },
631 : { "ibm860", RTL_TEXTENCODING_IBM_860 },
632 : { "cp860", RTL_TEXTENCODING_IBM_860 },
633 : { "860", RTL_TEXTENCODING_IBM_860 },
634 : { "csibm860", RTL_TEXTENCODING_IBM_860 },
635 : { "ibm861", RTL_TEXTENCODING_IBM_861 },
636 : { "cp861", RTL_TEXTENCODING_IBM_861 },
637 : { "861", RTL_TEXTENCODING_IBM_861 },
638 : { "csis", RTL_TEXTENCODING_IBM_861 },
639 : { "csibm861", RTL_TEXTENCODING_IBM_861 },
640 : { "ibm862", RTL_TEXTENCODING_IBM_862 },
641 : { "cp862", RTL_TEXTENCODING_IBM_862 },
642 : { "862", RTL_TEXTENCODING_IBM_862 },
643 : { "cspc862latinhebrew", RTL_TEXTENCODING_IBM_862 },
644 : { "ibm863", RTL_TEXTENCODING_IBM_863 },
645 : { "cp863", RTL_TEXTENCODING_IBM_863 },
646 : { "863", RTL_TEXTENCODING_IBM_863 },
647 : { "csibm863", RTL_TEXTENCODING_IBM_863 },
648 : { "ibm864", RTL_TEXTENCODING_IBM_864 },
649 : { "cp864", RTL_TEXTENCODING_IBM_864 },
650 : { "864", RTL_TEXTENCODING_IBM_864 },
651 : { "csibm864", RTL_TEXTENCODING_IBM_864 },
652 : { "ibm865", RTL_TEXTENCODING_IBM_865 },
653 : { "cp865", RTL_TEXTENCODING_IBM_865 },
654 : { "865", RTL_TEXTENCODING_IBM_865 },
655 : { "csibm865", RTL_TEXTENCODING_IBM_865 },
656 : { "ibm866", RTL_TEXTENCODING_IBM_866 },
657 : { "cp866", RTL_TEXTENCODING_IBM_866 },
658 : { "866", RTL_TEXTENCODING_IBM_866 },
659 : { "csibm866", RTL_TEXTENCODING_IBM_866 },
660 : /* { "ibm868", RTL_TEXTENCODING_IBM_868 }, */
661 : /* { "cp868", RTL_TEXTENCODING_IBM_868 }, */
662 : /* { "cpar", RTL_TEXTENCODING_IBM_868 }, */
663 : /* { "csibm868", RTL_TEXTENCODING_IBM_868 }, */
664 : { "ibm869", RTL_TEXTENCODING_IBM_869 },
665 : { "cp869", RTL_TEXTENCODING_IBM_869 },
666 : { "869", RTL_TEXTENCODING_IBM_869 },
667 : { "cpgr", RTL_TEXTENCODING_IBM_869 },
668 : { "csibm869", RTL_TEXTENCODING_IBM_869 },
669 : { "ibm869", RTL_TEXTENCODING_IBM_869 },
670 : { "cp869", RTL_TEXTENCODING_IBM_869 },
671 : { "869", RTL_TEXTENCODING_IBM_869 },
672 : { "cpgr", RTL_TEXTENCODING_IBM_869 },
673 : { "csibm869", RTL_TEXTENCODING_IBM_869 },
674 : { "mac", RTL_TEXTENCODING_APPLE_ROMAN },
675 : { "csmacintosh", RTL_TEXTENCODING_APPLE_ROMAN },
676 : { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
677 : { "mskanji", RTL_TEXTENCODING_MS_932 },
678 : { "csshiftjis", RTL_TEXTENCODING_SHIFT_JIS },
679 : { "jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
680 : { "jisc62261983", RTL_TEXTENCODING_JIS_X_0208 },
681 : { "csiso87jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
682 : { "isoir86", RTL_TEXTENCODING_JIS_X_0208 },
683 : { "x0208", RTL_TEXTENCODING_JIS_X_0208 },
684 : { "jisx0201", RTL_TEXTENCODING_JIS_X_0201 },
685 : { "cshalfwidthkatakana", RTL_TEXTENCODING_JIS_X_0201 },
686 : { "x0201", RTL_TEXTENCODING_JIS_X_0201 },
687 : { "jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
688 : { "csiso159jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
689 : { "isoir159", RTL_TEXTENCODING_JIS_X_0208 },
690 : { "x0212", RTL_TEXTENCODING_JIS_X_0212 },
691 : { "isoir6", RTL_TEXTENCODING_ASCII_US },
692 : { "xsjis", RTL_TEXTENCODING_SHIFT_JIS },
693 : { "sjis", RTL_TEXTENCODING_SHIFT_JIS },
694 : { "ascii", RTL_TEXTENCODING_ASCII_US },
695 : { "us", RTL_TEXTENCODING_ASCII_US },
696 : { "gb180302000", RTL_TEXTENCODING_GB_18030 },
697 : /* This is no actual MIME character set name, it is only in here
698 : for backwards compatibility (before "GB18030" was officially
699 : registered with IANA, this code contained some guesses of what
700 : would become official names for GB18030). */
701 : { "gb18030", RTL_TEXTENCODING_GB_18030 },
702 : { "big5hkscs", RTL_TEXTENCODING_BIG5_HKSCS },
703 : { "tis620", RTL_TEXTENCODING_TIS_620 },
704 : { "gbk", RTL_TEXTENCODING_GBK },
705 : { "cp936", RTL_TEXTENCODING_GBK },
706 : { "ms936", RTL_TEXTENCODING_GBK },
707 : { "windows936", RTL_TEXTENCODING_GBK },
708 : { "cp874", RTL_TEXTENCODING_MS_874 },
709 : /* This is no official MIME character set name, but it might be in
710 : use in Thailand. */
711 : { "ms874", RTL_TEXTENCODING_MS_874 },
712 : /* This is no official MIME character set name, but it might be in
713 : use in Thailand. */
714 : { "windows874", RTL_TEXTENCODING_MS_874 },
715 : /* This is no official MIME character set name, but it might be in
716 : use in Thailand. */
717 : { "koi8u", RTL_TEXTENCODING_KOI8_U },
718 : { "cpis", RTL_TEXTENCODING_IBM_861 },
719 : { "ksc56011987", RTL_TEXTENCODING_MS_949 },
720 : { "isoir149", RTL_TEXTENCODING_MS_949 },
721 : { "ksc56011989", RTL_TEXTENCODING_MS_949 },
722 : { "ksc5601", RTL_TEXTENCODING_MS_949 },
723 : { "korean", RTL_TEXTENCODING_MS_949 },
724 : { "csksc56011987", RTL_TEXTENCODING_MS_949 },
725 : /* Map KS_C_5601-1987 and aliases to MS-949 instead of EUC-KR, as
726 : this character set identifier seems to be prominently used by MS
727 : to stand for KS C 5601 plus MS-949 extensions */
728 : { "latin9", RTL_TEXTENCODING_ISO_8859_15 },
729 : { "adobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
730 : { "csadobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
731 : { "adobesymbolencoding", RTL_TEXTENCODING_ADOBE_SYMBOL },
732 : { "cshppsmath", RTL_TEXTENCODING_ADOBE_SYMBOL },
733 : { "ptcp154", RTL_TEXTENCODING_PT154 },
734 : { "csptcp154", RTL_TEXTENCODING_PT154 },
735 : { "pt154", RTL_TEXTENCODING_PT154 },
736 : { "cp154", RTL_TEXTENCODING_PT154 },
737 : { "xisciide", RTL_TEXTENCODING_ISCII_DEVANAGARI },
738 : /* This is not an official MIME character set name, but is in use by
739 : various windows APIs. */
740 : { NULL, RTL_TEXTENCODING_DONTKNOW }
741 : };
742 :
743 282 : rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
744 : char* pBuf;
745 282 : const ImplStrCharsetDef* pData = aVIPMimeCharsetTab;
746 282 : sal_uInt32 nBufLen = strlen( pMimeCharset )+1;
747 :
748 : /* Alloc Buffer and map to lower case and remove non alphanumeric chars */
749 282 : pBuf = new char[nBufLen];
750 282 : Impl_toAsciiLowerAndRemoveNonAlphanumeric( pMimeCharset, pBuf );
751 :
752 : /* Search for equal in the VIP table */
753 7777 : while ( pData->mpCharsetStr )
754 : {
755 7414 : if ( strcmp( pBuf, pData->mpCharsetStr ) == 0 )
756 : {
757 201 : eEncoding = pData->meTextEncoding;
758 201 : break;
759 : }
760 :
761 7213 : pData++;
762 : }
763 :
764 : /* Search for matching in the mime table */
765 282 : if ( eEncoding == RTL_TEXTENCODING_DONTKNOW )
766 : {
767 81 : pData = aMimeCharsetTab;
768 9967 : while ( pData->mpCharsetStr )
769 : {
770 9885 : if ( Impl_matchString( pBuf, pData->mpCharsetStr ) )
771 : {
772 80 : eEncoding = pData->meTextEncoding;
773 80 : break;
774 : }
775 :
776 9805 : pData++;
777 : }
778 : }
779 :
780 282 : delete[] pBuf;
781 :
782 282 : return eEncoding;
783 : }
784 :
785 : /* ======================================================================= */
786 :
787 529 : sal_uInt8 SAL_CALL rtl_getBestWindowsCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
788 : {
789 529 : const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
790 529 : if ( pData )
791 290 : return pData->mnBestWindowsCharset;
792 : else
793 239 : return 1;
794 : }
795 :
796 : /* ----------------------------------------------------------------------- */
797 :
798 80 : const char* SAL_CALL rtl_getBestUnixCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
799 : {
800 80 : const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
801 80 : if ( pData )
802 80 : return (char const *) pData->mpBestUnixCharset;
803 0 : else if( eTextEncoding == RTL_TEXTENCODING_UNICODE )
804 0 : return (char const *) "iso10646-1";
805 : else
806 0 : return 0;
807 : }
808 :
809 : /* ----------------------------------------------------------------------- */
810 :
811 184 : char const * SAL_CALL rtl_getMimeCharsetFromTextEncoding(rtl_TextEncoding
812 : nEncoding)
813 : {
814 184 : ImplTextEncodingData const * p = Impl_getTextEncodingData(nEncoding);
815 : return p && (p->mnInfoFlags & RTL_TEXTENCODING_INFO_MIME) != 0 ?
816 184 : p->mpBestMimeCharset : NULL;
817 : }
818 :
819 101 : const char* SAL_CALL rtl_getBestMimeCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
820 : {
821 101 : const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
822 101 : if ( pData )
823 101 : return (char const *) pData->mpBestMimeCharset;
824 : else
825 0 : return 0;
826 : }
827 :
828 : /* The following two functions are based on <http://www.sharmahd.com/tm/
829 : codepages.html>, <http://msdn.microsoft.com/workshop/author/dhtml/reference/
830 : charsets/charset4.asp>, and <http://www.iana.org/assignments/character-sets>.
831 : */
832 :
833 : rtl_TextEncoding SAL_CALL
834 417 : rtl_getTextEncodingFromWindowsCodePage(sal_uInt32 nCodePage)
835 : {
836 417 : switch (nCodePage)
837 : {
838 1 : case 437: return RTL_TEXTENCODING_IBM_437;
839 1 : case 708: return RTL_TEXTENCODING_ISO_8859_6;
840 1 : case 737: return RTL_TEXTENCODING_IBM_737;
841 1 : case 775: return RTL_TEXTENCODING_IBM_775;
842 1 : case 850: return RTL_TEXTENCODING_IBM_850;
843 1 : case 852: return RTL_TEXTENCODING_IBM_852;
844 1 : case 855: return RTL_TEXTENCODING_IBM_855;
845 1 : case 857: return RTL_TEXTENCODING_IBM_857;
846 1 : case 860: return RTL_TEXTENCODING_IBM_860;
847 1 : case 861: return RTL_TEXTENCODING_IBM_861;
848 1 : case 862: return RTL_TEXTENCODING_IBM_862;
849 1 : case 863: return RTL_TEXTENCODING_IBM_863;
850 1 : case 864: return RTL_TEXTENCODING_IBM_864;
851 1 : case 865: return RTL_TEXTENCODING_IBM_865;
852 1 : case 866: return RTL_TEXTENCODING_IBM_866;
853 1 : case 869: return RTL_TEXTENCODING_IBM_869;
854 2 : case 874: return RTL_TEXTENCODING_MS_874;
855 23 : case 932: return RTL_TEXTENCODING_MS_932;
856 9 : case 936: return RTL_TEXTENCODING_MS_936;
857 1 : case 949: return RTL_TEXTENCODING_MS_949;
858 4 : case 950: return RTL_TEXTENCODING_MS_950;
859 29 : case 1250: return RTL_TEXTENCODING_MS_1250;
860 33 : case 1251: return RTL_TEXTENCODING_MS_1251;
861 130 : case 1252: return RTL_TEXTENCODING_MS_1252;
862 19 : case 1253: return RTL_TEXTENCODING_MS_1253;
863 19 : case 1254: return RTL_TEXTENCODING_MS_1254;
864 15 : case 1255: return RTL_TEXTENCODING_MS_1255;
865 13 : case 1256: return RTL_TEXTENCODING_MS_1256;
866 19 : case 1257: return RTL_TEXTENCODING_MS_1257;
867 15 : case 1258: return RTL_TEXTENCODING_MS_1258;
868 1 : case 1361: return RTL_TEXTENCODING_MS_1361;
869 1 : case 10000: return RTL_TEXTENCODING_APPLE_ROMAN;
870 1 : case 10001: return RTL_TEXTENCODING_APPLE_JAPANESE;
871 1 : case 10002: return RTL_TEXTENCODING_APPLE_CHINTRAD;
872 1 : case 10003: return RTL_TEXTENCODING_APPLE_KOREAN;
873 1 : case 10004: return RTL_TEXTENCODING_APPLE_ARABIC;
874 1 : case 10005: return RTL_TEXTENCODING_APPLE_HEBREW;
875 1 : case 10006: return RTL_TEXTENCODING_APPLE_GREEK;
876 1 : case 10007: return RTL_TEXTENCODING_APPLE_CYRILLIC;
877 1 : case 10008: return RTL_TEXTENCODING_APPLE_CHINSIMP;
878 1 : case 10010: return RTL_TEXTENCODING_APPLE_ROMANIAN;
879 1 : case 10017: return RTL_TEXTENCODING_APPLE_UKRAINIAN;
880 1 : case 10029: return RTL_TEXTENCODING_APPLE_CENTEURO;
881 1 : case 10079: return RTL_TEXTENCODING_APPLE_ICELAND;
882 1 : case 10081: return RTL_TEXTENCODING_APPLE_TURKISH;
883 1 : case 10082: return RTL_TEXTENCODING_APPLE_CROATIAN;
884 1 : case 20127: return RTL_TEXTENCODING_ASCII_US;
885 1 : case 20866: return RTL_TEXTENCODING_KOI8_R;
886 1 : case 21866: return RTL_TEXTENCODING_KOI8_U;
887 1 : case 28591: return RTL_TEXTENCODING_ISO_8859_1;
888 1 : case 28592: return RTL_TEXTENCODING_ISO_8859_2;
889 1 : case 28593: return RTL_TEXTENCODING_ISO_8859_3;
890 1 : case 28594: return RTL_TEXTENCODING_ISO_8859_4;
891 1 : case 28595: return RTL_TEXTENCODING_ISO_8859_5;
892 1 : case 28596: return RTL_TEXTENCODING_ISO_8859_6;
893 1 : case 28597: return RTL_TEXTENCODING_ISO_8859_7;
894 1 : case 28598: return RTL_TEXTENCODING_ISO_8859_8;
895 1 : case 28599: return RTL_TEXTENCODING_ISO_8859_9;
896 1 : case 28605: return RTL_TEXTENCODING_ISO_8859_15;
897 1 : case 50220: return RTL_TEXTENCODING_ISO_2022_JP;
898 1 : case 50225: return RTL_TEXTENCODING_ISO_2022_KR;
899 1 : case 51932: return RTL_TEXTENCODING_EUC_JP;
900 1 : case 51936: return RTL_TEXTENCODING_EUC_CN;
901 1 : case 51949: return RTL_TEXTENCODING_EUC_KR;
902 1 : case 57002: return RTL_TEXTENCODING_ISCII_DEVANAGARI;
903 1 : case 65000: return RTL_TEXTENCODING_UTF7;
904 30 : case 65001: return RTL_TEXTENCODING_UTF8;
905 4 : default: return RTL_TEXTENCODING_DONTKNOW;
906 : }
907 : }
908 :
909 : sal_uInt32 SAL_CALL
910 69 : rtl_getWindowsCodePageFromTextEncoding(rtl_TextEncoding nEncoding)
911 : {
912 69 : switch (nEncoding)
913 : {
914 1 : case RTL_TEXTENCODING_IBM_437: return 437;
915 : /* case RTL_TEXTENCODING_ISO_8859_6: return 708; */
916 1 : case RTL_TEXTENCODING_IBM_737: return 737;
917 1 : case RTL_TEXTENCODING_IBM_775: return 775;
918 1 : case RTL_TEXTENCODING_IBM_850: return 850;
919 1 : case RTL_TEXTENCODING_IBM_852: return 852;
920 1 : case RTL_TEXTENCODING_IBM_855: return 855;
921 1 : case RTL_TEXTENCODING_IBM_857: return 857;
922 1 : case RTL_TEXTENCODING_IBM_860: return 860;
923 1 : case RTL_TEXTENCODING_IBM_861: return 861;
924 1 : case RTL_TEXTENCODING_IBM_862: return 862;
925 1 : case RTL_TEXTENCODING_IBM_863: return 863;
926 1 : case RTL_TEXTENCODING_IBM_864: return 864;
927 1 : case RTL_TEXTENCODING_IBM_865: return 865;
928 1 : case RTL_TEXTENCODING_IBM_866: return 866;
929 1 : case RTL_TEXTENCODING_IBM_869: return 869;
930 1 : case RTL_TEXTENCODING_MS_874: return 874;
931 1 : case RTL_TEXTENCODING_MS_932: return 932;
932 1 : case RTL_TEXTENCODING_MS_936: return 936;
933 1 : case RTL_TEXTENCODING_MS_949: return 949;
934 1 : case RTL_TEXTENCODING_MS_950: return 950;
935 1 : case RTL_TEXTENCODING_MS_1250: return 1250;
936 1 : case RTL_TEXTENCODING_MS_1251: return 1251;
937 1 : case RTL_TEXTENCODING_MS_1252: return 1252;
938 1 : case RTL_TEXTENCODING_MS_1253: return 1253;
939 1 : case RTL_TEXTENCODING_MS_1254: return 1254;
940 1 : case RTL_TEXTENCODING_MS_1255: return 1255;
941 1 : case RTL_TEXTENCODING_MS_1256: return 1256;
942 1 : case RTL_TEXTENCODING_MS_1257: return 1257;
943 1 : case RTL_TEXTENCODING_MS_1258: return 1258;
944 1 : case RTL_TEXTENCODING_MS_1361: return 1361;
945 1 : case RTL_TEXTENCODING_APPLE_ROMAN: return 10000;
946 1 : case RTL_TEXTENCODING_APPLE_JAPANESE: return 10001;
947 1 : case RTL_TEXTENCODING_APPLE_CHINTRAD: return 10002;
948 1 : case RTL_TEXTENCODING_APPLE_KOREAN: return 10003;
949 1 : case RTL_TEXTENCODING_APPLE_ARABIC: return 10004;
950 1 : case RTL_TEXTENCODING_APPLE_HEBREW: return 10005;
951 1 : case RTL_TEXTENCODING_APPLE_GREEK: return 10006;
952 1 : case RTL_TEXTENCODING_APPLE_CYRILLIC: return 10007;
953 1 : case RTL_TEXTENCODING_APPLE_CHINSIMP: return 10008;
954 1 : case RTL_TEXTENCODING_APPLE_ROMANIAN: return 10010;
955 1 : case RTL_TEXTENCODING_APPLE_UKRAINIAN: return 10017;
956 1 : case RTL_TEXTENCODING_APPLE_CENTEURO: return 10029;
957 1 : case RTL_TEXTENCODING_APPLE_ICELAND: return 10079;
958 1 : case RTL_TEXTENCODING_APPLE_TURKISH: return 10081;
959 1 : case RTL_TEXTENCODING_APPLE_CROATIAN: return 10082;
960 1 : case RTL_TEXTENCODING_ASCII_US: return 20127;
961 1 : case RTL_TEXTENCODING_KOI8_R: return 20866;
962 1 : case RTL_TEXTENCODING_KOI8_U: return 21866;
963 1 : case RTL_TEXTENCODING_ISO_8859_1: return 28591;
964 1 : case RTL_TEXTENCODING_ISO_8859_2: return 28592;
965 1 : case RTL_TEXTENCODING_ISO_8859_3: return 28593;
966 1 : case RTL_TEXTENCODING_ISO_8859_4: return 28594;
967 1 : case RTL_TEXTENCODING_ISO_8859_5: return 28595;
968 1 : case RTL_TEXTENCODING_ISO_8859_6: return 28596;
969 1 : case RTL_TEXTENCODING_ISO_8859_7: return 28597;
970 1 : case RTL_TEXTENCODING_ISO_8859_8: return 28598;
971 1 : case RTL_TEXTENCODING_ISO_8859_9: return 28599;
972 1 : case RTL_TEXTENCODING_ISO_8859_15: return 28605;
973 1 : case RTL_TEXTENCODING_ISO_2022_JP: return 50220;
974 1 : case RTL_TEXTENCODING_ISO_2022_KR: return 50225;
975 1 : case RTL_TEXTENCODING_EUC_JP: return 51932;
976 1 : case RTL_TEXTENCODING_EUC_CN: return 51936;
977 1 : case RTL_TEXTENCODING_EUC_KR: return 51949;
978 1 : case RTL_TEXTENCODING_ISCII_DEVANAGARI: return 57002;
979 1 : case RTL_TEXTENCODING_UTF7: return 65000;
980 1 : case RTL_TEXTENCODING_UTF8: return 65001;
981 3 : default: return 0;
982 : }
983 : }
984 :
985 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|