Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "sal/config.h"
21 :
22 : #include <cstddef>
23 : #include <cstring>
24 :
25 : #include "rtl/tencinfo.h"
26 :
27 : #include "gettextencodingdata.hxx"
28 : #include "tenchelp.hxx"
29 : #include <boost/scoped_array.hpp>
30 :
31 2222 : sal_Bool SAL_CALL rtl_isOctetTextEncoding(rtl_TextEncoding nEncoding)
32 : {
33 : return
34 : nEncoding > RTL_TEXTENCODING_DONTKNOW
35 2222 : && nEncoding != 9 // RTL_TEXTENCODING_SYSTEM
36 4444 : && nEncoding <= RTL_TEXTENCODING_ADOBE_DINGBATS; // always update this!
37 : }
38 :
39 : /* ======================================================================= */
40 :
41 220 : static void Impl_toAsciiLower( const char* pName, char* pBuf )
42 : {
43 2336 : while ( *pName )
44 : {
45 : /* A-Z */
46 1896 : if ( (*pName >= 0x41) && (*pName <= 0x5A) )
47 180 : *pBuf = (*pName)+0x20; /* toAsciiLower */
48 : else
49 1716 : *pBuf = *pName;
50 :
51 1896 : pBuf++;
52 1896 : pName++;
53 : }
54 :
55 220 : *pBuf = '\0';
56 220 : }
57 :
58 : /* ----------------------------------------------------------------------- */
59 :
60 6465 : static void Impl_toAsciiLowerAndRemoveNonAlphanumeric( const char* pName, char* pBuf )
61 : {
62 61513 : while ( *pName )
63 : {
64 : /* A-Z */
65 48583 : if ( (*pName >= 0x41) && (*pName <= 0x5A) )
66 : {
67 544 : *pBuf = (*pName)+0x20; /* toAsciiLower */
68 544 : pBuf++;
69 : }
70 : /* a-z, 0-9 */
71 68243 : else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) ||
72 33973 : ((*pName >= 0x30) && (*pName <= 0x39)) )
73 : {
74 41576 : *pBuf = *pName;
75 41576 : pBuf++;
76 : }
77 :
78 48583 : pName++;
79 : }
80 :
81 6465 : *pBuf = '\0';
82 6465 : }
83 :
84 : /* ----------------------------------------------------------------------- */
85 :
86 : /* pMatchStr must match with all characters in pCompStr */
87 22794 : static bool Impl_matchString( const char* pCompStr, const char* pMatchStr )
88 : {
89 : /* We test only for end in MatchStr, because the last 0 character from */
90 : /* pCompStr is unequal a character in MatchStr, so the loop terminates */
91 55850 : while ( *pMatchStr )
92 : {
93 32586 : if ( *pCompStr != *pMatchStr )
94 22324 : return false;
95 :
96 10262 : pCompStr++;
97 10262 : pMatchStr++;
98 : }
99 :
100 470 : return true;
101 : }
102 :
103 : /* ======================================================================= */
104 :
105 : struct ImplStrCharsetDef
106 : {
107 : const char* mpCharsetStr;
108 : rtl_TextEncoding meTextEncoding;
109 : };
110 :
111 : struct ImplStrFirstPartCharsetDef
112 : {
113 : const char* mpCharsetStr;
114 : const ImplStrCharsetDef* mpSecondPartTab;
115 : };
116 :
117 : /* ======================================================================= */
118 :
119 1100634 : sal_Bool SAL_CALL rtl_getTextEncodingInfo( rtl_TextEncoding eTextEncoding, rtl_TextEncodingInfo* pEncInfo )
120 : {
121 : const ImplTextEncodingData* pData;
122 :
123 1100634 : pData = Impl_getTextEncodingData( eTextEncoding );
124 1100634 : if ( !pData )
125 : {
126 : /* HACK: For not implemented encoding, because not all
127 : calls handle the errors */
128 1394 : if ( pEncInfo->StructSize < 5 )
129 2 : return false;
130 1392 : pEncInfo->MinimumCharSize = 1;
131 :
132 1392 : if ( pEncInfo->StructSize < 6 )
133 2 : return true;
134 1390 : pEncInfo->MaximumCharSize = 1;
135 :
136 1390 : if ( pEncInfo->StructSize < 7 )
137 2 : return true;
138 1388 : pEncInfo->AverageCharSize = 1;
139 :
140 1388 : if ( pEncInfo->StructSize < 12 )
141 2 : return true;
142 1386 : pEncInfo->Flags = 0;
143 :
144 1386 : return false;
145 : }
146 :
147 1099240 : if ( pEncInfo->StructSize < 5 )
148 2 : return false;
149 1099238 : pEncInfo->MinimumCharSize = pData->mnMinCharSize;
150 :
151 1099238 : if ( pEncInfo->StructSize < 6 )
152 2 : return true;
153 1099236 : pEncInfo->MaximumCharSize = pData->mnMaxCharSize;
154 :
155 1099236 : if ( pEncInfo->StructSize < 7 )
156 2 : return true;
157 1099234 : pEncInfo->AverageCharSize = pData->mnAveCharSize;
158 :
159 1099234 : if ( pEncInfo->StructSize < 12 )
160 2 : return true;
161 1099232 : pEncInfo->Flags = pData->mnInfoFlags;
162 :
163 1099232 : return true;
164 : }
165 :
166 : /* ======================================================================= */
167 :
168 45368 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromWindowsCharset( sal_uInt8 nWinCharset )
169 : {
170 : rtl_TextEncoding eTextEncoding;
171 :
172 45368 : switch ( nWinCharset )
173 : {
174 26362 : case 0: eTextEncoding = RTL_TEXTENCODING_MS_1252; break; /* ANSI_CHARSET */
175 4314 : case 2: eTextEncoding = RTL_TEXTENCODING_SYMBOL; break; /* SYMBOL_CHARSET */
176 16 : case 77: eTextEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break;/* MAC_CHARSET */
177 856 : case 128: eTextEncoding = RTL_TEXTENCODING_MS_932; break; /* SHIFTJIS_CHARSET */
178 12 : case 129: eTextEncoding = RTL_TEXTENCODING_MS_949; break; /* HANGEUL_CHARSET */
179 0 : case 130: eTextEncoding = RTL_TEXTENCODING_MS_1361; break; /* JOHAB_CHARSET */
180 732 : case 134: eTextEncoding = RTL_TEXTENCODING_MS_936; break; /* GB2312_CHARSET */
181 14 : case 136: eTextEncoding = RTL_TEXTENCODING_MS_950; break; /* CHINESEBIG5_CHARSET */
182 1576 : case 161: eTextEncoding = RTL_TEXTENCODING_MS_1253; break; /* GREEK_CHARSET */
183 0 : case 162: eTextEncoding = RTL_TEXTENCODING_MS_1254; break; /* TURKISH_CHARSET */
184 0 : case 163: eTextEncoding = RTL_TEXTENCODING_MS_1258; break; /* VIETNAMESE_CHARSET !!! */
185 18 : case 177: eTextEncoding = RTL_TEXTENCODING_MS_1255; break; /* HEBREW_CHARSET */
186 2460 : case 178: eTextEncoding = RTL_TEXTENCODING_MS_1256; break; /* ARABIC_CHARSET */
187 2 : case 186: eTextEncoding = RTL_TEXTENCODING_MS_1257; break; /* BALTIC_CHARSET */
188 64 : case 204: eTextEncoding = RTL_TEXTENCODING_MS_1251; break; /* RUSSIAN_CHARSET */
189 8 : case 222: eTextEncoding = RTL_TEXTENCODING_MS_874; break; /* THAI_CHARSET */
190 3266 : case 238: eTextEncoding = RTL_TEXTENCODING_MS_1250; break; /* EASTEUROPE_CHARSET */
191 0 : case 255: eTextEncoding = RTL_TEXTENCODING_IBM_850; break; /* OEM_CHARSET */
192 5668 : default: eTextEncoding = RTL_TEXTENCODING_DONTKNOW; break;
193 : }
194 :
195 45368 : return eTextEncoding;
196 : }
197 :
198 : /* ----------------------------------------------------------------------- */
199 :
200 220 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromUnixCharset( const char* pUnixCharset )
201 : {
202 : /* See <ftp://ftp.x.org/pub/DOCS/registry>, section 14 ("Font Charset
203 : * (Registry and Encoding) Names").
204 : */
205 :
206 : /* All Identifiers in the tables are lower case The function search */
207 : /* for the first matching string in the tables. */
208 : /* Sort order: unique (first 14, than 1), important */
209 :
210 : static ImplStrCharsetDef const aUnixCharsetISOTab[] =
211 : {
212 : { "15", RTL_TEXTENCODING_ISO_8859_15 },
213 : { "14", RTL_TEXTENCODING_ISO_8859_14 },
214 : { "13", RTL_TEXTENCODING_ISO_8859_13 },
215 : { "11", RTL_TEXTENCODING_TIS_620 },
216 : { "10", RTL_TEXTENCODING_ISO_8859_10 },
217 : { "1", RTL_TEXTENCODING_ISO_8859_1 },
218 : { "2", RTL_TEXTENCODING_ISO_8859_2 },
219 : { "3", RTL_TEXTENCODING_ISO_8859_3 },
220 : { "4", RTL_TEXTENCODING_ISO_8859_4 },
221 : { "5", RTL_TEXTENCODING_ISO_8859_5 },
222 : { "6", RTL_TEXTENCODING_ISO_8859_6 },
223 : { "7", RTL_TEXTENCODING_ISO_8859_7 },
224 : { "8", RTL_TEXTENCODING_ISO_8859_8 },
225 : { "9", RTL_TEXTENCODING_ISO_8859_9 },
226 : { NULL, RTL_TEXTENCODING_DONTKNOW }
227 : };
228 :
229 : static ImplStrCharsetDef const aUnixCharsetADOBETab[] =
230 : {
231 : { "fontspecific", RTL_TEXTENCODING_SYMBOL },
232 : { NULL, RTL_TEXTENCODING_DONTKNOW }
233 : };
234 :
235 : static ImplStrCharsetDef const aUnixCharsetMSTab[] =
236 : {
237 : { "1252", RTL_TEXTENCODING_MS_1252 },
238 : { "1250", RTL_TEXTENCODING_MS_1250 },
239 : { "1251", RTL_TEXTENCODING_MS_1251 },
240 : { "1253", RTL_TEXTENCODING_MS_1253 },
241 : { "1254", RTL_TEXTENCODING_MS_1254 },
242 : { "1255", RTL_TEXTENCODING_MS_1255 },
243 : { "1256", RTL_TEXTENCODING_MS_1256 },
244 : { "1257", RTL_TEXTENCODING_MS_1257 },
245 : { "1258", RTL_TEXTENCODING_MS_1258 },
246 : { "932", RTL_TEXTENCODING_MS_932 },
247 : { "936", RTL_TEXTENCODING_MS_936 },
248 : { "949", RTL_TEXTENCODING_MS_949 },
249 : { "950", RTL_TEXTENCODING_MS_950 },
250 : { "1361", RTL_TEXTENCODING_MS_1361 },
251 : { "cp1252", RTL_TEXTENCODING_MS_1252 },
252 : { "cp1250", RTL_TEXTENCODING_MS_1250 },
253 : { "cp1251", RTL_TEXTENCODING_MS_1251 },
254 : { "cp1253", RTL_TEXTENCODING_MS_1253 },
255 : { "cp1254", RTL_TEXTENCODING_MS_1254 },
256 : { "cp1255", RTL_TEXTENCODING_MS_1255 },
257 : { "cp1256", RTL_TEXTENCODING_MS_1256 },
258 : { "cp1257", RTL_TEXTENCODING_MS_1257 },
259 : { "cp1258", RTL_TEXTENCODING_MS_1258 },
260 : { "cp932", RTL_TEXTENCODING_MS_932 },
261 : { "cp936", RTL_TEXTENCODING_MS_936 },
262 : { "cp949", RTL_TEXTENCODING_MS_949 },
263 : { "cp950", RTL_TEXTENCODING_MS_950 },
264 : { "cp1361", RTL_TEXTENCODING_MS_1361 },
265 : { NULL, RTL_TEXTENCODING_DONTKNOW }
266 : };
267 :
268 : static ImplStrCharsetDef const aUnixCharsetIBMTab[] =
269 : {
270 : { "437", RTL_TEXTENCODING_IBM_437 },
271 : { "850", RTL_TEXTENCODING_IBM_850 },
272 : { "860", RTL_TEXTENCODING_IBM_860 },
273 : { "861", RTL_TEXTENCODING_IBM_861 },
274 : { "863", RTL_TEXTENCODING_IBM_863 },
275 : { "865", RTL_TEXTENCODING_IBM_865 },
276 : { "737", RTL_TEXTENCODING_IBM_737 },
277 : { "775", RTL_TEXTENCODING_IBM_775 },
278 : { "852", RTL_TEXTENCODING_IBM_852 },
279 : { "855", RTL_TEXTENCODING_IBM_855 },
280 : { "857", RTL_TEXTENCODING_IBM_857 },
281 : { "862", RTL_TEXTENCODING_IBM_862 },
282 : { "864", RTL_TEXTENCODING_IBM_864 },
283 : { "866", RTL_TEXTENCODING_IBM_866 },
284 : { "869", RTL_TEXTENCODING_IBM_869 },
285 : { "874", RTL_TEXTENCODING_MS_874 },
286 : { "1004", RTL_TEXTENCODING_MS_1252 },
287 : { "65400", RTL_TEXTENCODING_SYMBOL },
288 : { NULL, RTL_TEXTENCODING_DONTKNOW }
289 : };
290 :
291 : static ImplStrCharsetDef const aUnixCharsetKOI8Tab[] =
292 : {
293 : { "r", RTL_TEXTENCODING_KOI8_R },
294 : { "u", RTL_TEXTENCODING_KOI8_U },
295 : { NULL, RTL_TEXTENCODING_DONTKNOW }
296 : };
297 :
298 : static ImplStrCharsetDef const aUnixCharsetJISX0208Tab[] =
299 : {
300 : { NULL, RTL_TEXTENCODING_JIS_X_0208 }
301 : };
302 :
303 : static ImplStrCharsetDef const aUnixCharsetJISX0201Tab[] =
304 : {
305 : { NULL, RTL_TEXTENCODING_JIS_X_0201 }
306 : };
307 :
308 : static ImplStrCharsetDef const aUnixCharsetJISX0212Tab[] =
309 : {
310 : { NULL, RTL_TEXTENCODING_JIS_X_0212 }
311 : };
312 :
313 : static ImplStrCharsetDef const aUnixCharsetGBTab[] =
314 : {
315 : { NULL, RTL_TEXTENCODING_GB_2312 }
316 : };
317 :
318 : static ImplStrCharsetDef const aUnixCharsetGBKTab[] =
319 : {
320 : { NULL, RTL_TEXTENCODING_GBK }
321 : };
322 :
323 : static ImplStrCharsetDef const aUnixCharsetBIG5Tab[] =
324 : {
325 : { NULL, RTL_TEXTENCODING_BIG5 }
326 : };
327 :
328 : static ImplStrCharsetDef const aUnixCharsetKSC56011987Tab[] =
329 : {
330 : { NULL, RTL_TEXTENCODING_EUC_KR }
331 : };
332 :
333 : static ImplStrCharsetDef const aUnixCharsetKSC56011992Tab[] =
334 : {
335 : { NULL, RTL_TEXTENCODING_MS_1361 }
336 : };
337 :
338 : static ImplStrCharsetDef const aUnixCharsetISO10646Tab[] =
339 : {
340 : { NULL, RTL_TEXTENCODING_UNICODE }
341 : };
342 :
343 : static ImplStrCharsetDef const aUnixCharsetUNICODETab[] =
344 : {
345 : /* Currently every Unicode Encoding is for us Unicode */
346 : /* { "fontspecific", RTL_TEXTENCODING_UNICODE }, */
347 : { NULL, RTL_TEXTENCODING_UNICODE }
348 : };
349 :
350 : static ImplStrCharsetDef const aUnixCharsetSymbolTab[] =
351 : {
352 : { NULL, RTL_TEXTENCODING_SYMBOL }
353 : };
354 :
355 : /* See <http://cvs.freedesktop.org/xorg/xc/fonts/encodings/iso8859-11.enc?
356 : rev=1.1.1.1>: */
357 : static ImplStrCharsetDef const aUnixCharsetTIS620Tab[] =
358 : {
359 : { "0", RTL_TEXTENCODING_TIS_620 },
360 : { "2529", RTL_TEXTENCODING_TIS_620 },
361 : { "2533", RTL_TEXTENCODING_TIS_620 },
362 : { NULL, RTL_TEXTENCODING_DONTKNOW }
363 : };
364 : static ImplStrCharsetDef const aUnixCharsetTIS6202529Tab[] =
365 : {
366 : { "1", RTL_TEXTENCODING_TIS_620 },
367 : { NULL, RTL_TEXTENCODING_DONTKNOW }
368 : };
369 : static ImplStrCharsetDef const aUnixCharsetTIS6202533Tab[] =
370 : {
371 : { "0", RTL_TEXTENCODING_TIS_620 },
372 : { "1", RTL_TEXTENCODING_TIS_620 },
373 : { NULL, RTL_TEXTENCODING_DONTKNOW }
374 : };
375 :
376 : static ImplStrFirstPartCharsetDef const aUnixCharsetFirstPartTab[] =
377 : {
378 : { "iso8859", aUnixCharsetISOTab },
379 : { "adobe", aUnixCharsetADOBETab },
380 : { "ansi", aUnixCharsetMSTab },
381 : { "microsoft", aUnixCharsetMSTab },
382 : { "ibm", aUnixCharsetIBMTab },
383 : { "koi8", aUnixCharsetKOI8Tab },
384 : { "jisx0208", aUnixCharsetJISX0208Tab },
385 : { "jisx0208.1983", aUnixCharsetJISX0208Tab },
386 : { "jisx0201", aUnixCharsetJISX0201Tab },
387 : { "jisx0201.1976", aUnixCharsetJISX0201Tab },
388 : { "jisx0212", aUnixCharsetJISX0212Tab },
389 : { "jisx0212.1990", aUnixCharsetJISX0212Tab },
390 : { "gb2312", aUnixCharsetGBTab },
391 : { "gbk", aUnixCharsetGBKTab },
392 : { "big5", aUnixCharsetBIG5Tab },
393 : { "iso10646", aUnixCharsetISO10646Tab },
394 : /* { "unicode", aUnixCharsetUNICODETab }, */ /* fonts contain only default chars */
395 : { "sunolcursor", aUnixCharsetSymbolTab },
396 : { "sunolglyph", aUnixCharsetSymbolTab },
397 : { "iso10646", aUnixCharsetUNICODETab },
398 : { "ksc5601.1987", aUnixCharsetKSC56011987Tab },
399 : { "ksc5601.1992", aUnixCharsetKSC56011992Tab },
400 : { "tis620.2529", aUnixCharsetTIS6202529Tab },
401 : { "tis620.2533", aUnixCharsetTIS6202533Tab },
402 : { "tis620", aUnixCharsetTIS620Tab },
403 : /* { "sunudcja.1997", }, */
404 : /* { "sunudcko.1997", }, */
405 : /* { "sunudczh.1997", }, */
406 : /* { "sunudczhtw.1997", }, */
407 : { NULL, NULL }
408 : };
409 :
410 220 : rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
411 : char* pTempBuf;
412 220 : sal_uInt32 nBufLen = strlen( pUnixCharset )+1;
413 : const char* pFirstPart;
414 : const char* pSecondPart;
415 :
416 : /* Alloc Buffer and map to lower case */
417 220 : boost::scoped_array<char> pBuf(new char[nBufLen]);
418 220 : Impl_toAsciiLower( pUnixCharset, pBuf.get() );
419 :
420 : /* Search FirstPart */
421 220 : pFirstPart = pBuf.get();
422 220 : pSecondPart = NULL;
423 220 : pTempBuf = pBuf.get();
424 1810 : while ( *pTempBuf )
425 : {
426 1582 : if ( *pTempBuf == '-' )
427 : {
428 212 : *pTempBuf = '\0';
429 212 : pSecondPart = pTempBuf+1;
430 212 : break;
431 : }
432 :
433 1370 : pTempBuf++;
434 : }
435 :
436 : /* Parttrenner gefunden */
437 220 : if ( pSecondPart )
438 : {
439 : /* Search for the part tab */
440 212 : const ImplStrFirstPartCharsetDef* pFirstPartData = aUnixCharsetFirstPartTab;
441 2114 : while ( pFirstPartData->mpCharsetStr )
442 : {
443 1852 : if ( Impl_matchString( pFirstPart, pFirstPartData->mpCharsetStr ) )
444 : {
445 : /* Search for the charset in the second part tab */
446 162 : const ImplStrCharsetDef* pData = pFirstPartData->mpSecondPartTab;
447 1348 : while ( pData->mpCharsetStr )
448 : {
449 1172 : if ( Impl_matchString( pSecondPart, pData->mpCharsetStr ) )
450 : {
451 148 : break;
452 : }
453 :
454 1024 : pData++;
455 : }
456 :
457 : /* use default encoding for first part */
458 162 : eEncoding = pData->meTextEncoding;
459 162 : break;
460 : }
461 :
462 1690 : pFirstPartData++;
463 : }
464 : }
465 :
466 220 : return eEncoding;
467 : }
468 :
469 : /* ----------------------------------------------------------------------- */
470 :
471 6465 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromMimeCharset( const char* pMimeCharset )
472 : {
473 : /* All Identifiers are in lower case and contain only alphanumeric */
474 : /* characters. The function search for the first equal string in */
475 : /* the table. In this table are only the most used mime types. */
476 : /* Sort order: important */
477 : static ImplStrCharsetDef const aVIPMimeCharsetTab[] =
478 : {
479 : { "usascii", RTL_TEXTENCODING_ASCII_US },
480 : { "utf8", RTL_TEXTENCODING_UTF8 },
481 : { "utf7", RTL_TEXTENCODING_UTF7 },
482 : { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
483 : { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
484 : { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
485 : { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
486 : { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
487 : { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
488 : { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
489 : { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
490 : { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
491 : { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
492 : { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
493 : { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
494 : { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
495 : { "iso2022jp", RTL_TEXTENCODING_ISO_2022_JP },
496 : { "iso2022jp2", RTL_TEXTENCODING_ISO_2022_JP },
497 : { "iso2022cn", RTL_TEXTENCODING_ISO_2022_CN },
498 : { "iso2022cnext", RTL_TEXTENCODING_ISO_2022_CN },
499 : { "iso2022kr", RTL_TEXTENCODING_ISO_2022_KR },
500 : { "eucjp", RTL_TEXTENCODING_EUC_JP },
501 : { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
502 : { "mskanji", RTL_TEXTENCODING_MS_932 },
503 : { "gb2312", RTL_TEXTENCODING_GB_2312 },
504 : { "cngb", RTL_TEXTENCODING_GB_2312 },
505 : { "big5", RTL_TEXTENCODING_BIG5 },
506 : { "cnbig5", RTL_TEXTENCODING_BIG5 },
507 : { "cngb12345", RTL_TEXTENCODING_GBT_12345 },
508 : { "euckr", RTL_TEXTENCODING_EUC_KR },
509 : { "koi8r", RTL_TEXTENCODING_KOI8_R },
510 : { "windows1252", RTL_TEXTENCODING_MS_1252 },
511 : { "windows1250", RTL_TEXTENCODING_MS_1250 },
512 : { "windows1251", RTL_TEXTENCODING_MS_1251 },
513 : { "windows1253", RTL_TEXTENCODING_MS_1253 },
514 : { "windows1254", RTL_TEXTENCODING_MS_1254 },
515 : { "windows1255", RTL_TEXTENCODING_MS_1255 },
516 : { "windows1256", RTL_TEXTENCODING_MS_1256 },
517 : { "windows1257", RTL_TEXTENCODING_MS_1257 },
518 : { "windows1258", RTL_TEXTENCODING_MS_1258 },
519 : { NULL, RTL_TEXTENCODING_DONTKNOW }
520 : };
521 :
522 : /* All Identifiers are in lower case and contain only alphanumeric */
523 : /* characters. The function search for the first matching string in */
524 : /* the table. */
525 : /* Sort order: unique (first iso885914, than iso88591), important */
526 : static ImplStrCharsetDef const aMimeCharsetTab[] =
527 : {
528 : { "unicode11utf7", RTL_TEXTENCODING_UTF7 },
529 : { "caunicode11utf7", RTL_TEXTENCODING_UTF7 },
530 : { "iso88591windows30", RTL_TEXTENCODING_ISO_8859_1 },
531 : { "iso88591win", RTL_TEXTENCODING_MS_1252 },
532 : { "iso88592win", RTL_TEXTENCODING_MS_1250 },
533 : { "iso88599win", RTL_TEXTENCODING_MS_1254 },
534 : { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
535 : { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
536 : { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
537 : { "iso885911", RTL_TEXTENCODING_TIS_620 },
538 : /* This is no official MIME character set name, but it might be in
539 : use in Thailand. */
540 : { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
541 : { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
542 : { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
543 : { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
544 : { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
545 : { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
546 : { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
547 : { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
548 : { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
549 : { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
550 : { "isoir100", RTL_TEXTENCODING_ISO_8859_1 },
551 : { "latin1", RTL_TEXTENCODING_ISO_8859_1 },
552 : { "l1", RTL_TEXTENCODING_ISO_8859_1 },
553 : { "cp819", RTL_TEXTENCODING_ISO_8859_1 },
554 : { "ibm819", RTL_TEXTENCODING_ISO_8859_1 },
555 : { "csisolatin1", RTL_TEXTENCODING_ISO_8859_1 },
556 : { "isoir101", RTL_TEXTENCODING_ISO_8859_2 },
557 : { "latin2", RTL_TEXTENCODING_ISO_8859_2 },
558 : { "l2", RTL_TEXTENCODING_ISO_8859_2 },
559 : { "csisolatin2", RTL_TEXTENCODING_ISO_8859_2 },
560 : { "isoir109", RTL_TEXTENCODING_ISO_8859_3 },
561 : { "latin3", RTL_TEXTENCODING_ISO_8859_3 },
562 : { "l3", RTL_TEXTENCODING_ISO_8859_3 },
563 : { "csisolatin3", RTL_TEXTENCODING_ISO_8859_3 },
564 : { "isoir110", RTL_TEXTENCODING_ISO_8859_4 },
565 : { "latin4", RTL_TEXTENCODING_ISO_8859_4 },
566 : { "l4", RTL_TEXTENCODING_ISO_8859_4 },
567 : { "csisolatin4", RTL_TEXTENCODING_ISO_8859_4 },
568 : { "isoir144", RTL_TEXTENCODING_ISO_8859_5 },
569 : { "cyrillicasian", RTL_TEXTENCODING_PT154 },
570 : { "cyrillic", RTL_TEXTENCODING_ISO_8859_5 },
571 : { "csisolatincyrillic", RTL_TEXTENCODING_ISO_8859_5 },
572 : { "isoir127", RTL_TEXTENCODING_ISO_8859_6 },
573 : { "arabic", RTL_TEXTENCODING_ISO_8859_6 },
574 : { "csisolatinarabic", RTL_TEXTENCODING_ISO_8859_6 },
575 : { "ecma114", RTL_TEXTENCODING_ISO_8859_6 },
576 : { "asmo708", RTL_TEXTENCODING_ISO_8859_6 },
577 : { "isoir126", RTL_TEXTENCODING_ISO_8859_7 },
578 : { "greek", RTL_TEXTENCODING_ISO_8859_7 },
579 : { "csisolatingreek", RTL_TEXTENCODING_ISO_8859_7 },
580 : { "elot928", RTL_TEXTENCODING_ISO_8859_7 },
581 : { "ecma118", RTL_TEXTENCODING_ISO_8859_7 },
582 : { "isoir138", RTL_TEXTENCODING_ISO_8859_8 },
583 : { "hebrew", RTL_TEXTENCODING_ISO_8859_8 },
584 : { "csisolatinhebrew", RTL_TEXTENCODING_ISO_8859_8 },
585 : { "isoir148", RTL_TEXTENCODING_ISO_8859_9 },
586 : { "latin5", RTL_TEXTENCODING_ISO_8859_9 },
587 : { "l5", RTL_TEXTENCODING_ISO_8859_9 },
588 : { "csisolatin5", RTL_TEXTENCODING_ISO_8859_9 },
589 : { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
590 : { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
591 : { "cswindows31latin1", RTL_TEXTENCODING_MS_1252 },
592 : { "cswindows31latin2", RTL_TEXTENCODING_MS_1250 },
593 : { "cswindows31latin5", RTL_TEXTENCODING_MS_1254 },
594 : { "iso10646us", RTL_TEXTENCODING_ASCII_US },
595 : { "iso646irv", RTL_TEXTENCODING_ASCII_US },
596 : { "cskoi8r", RTL_TEXTENCODING_KOI8_R },
597 : { "ibm437", RTL_TEXTENCODING_IBM_437 },
598 : { "cp437", RTL_TEXTENCODING_IBM_437 },
599 : { "437", RTL_TEXTENCODING_IBM_437 },
600 : { "cspc8codepage437", RTL_TEXTENCODING_IBM_437 },
601 : { "ansix34", RTL_TEXTENCODING_ASCII_US },
602 : { "ibm367", RTL_TEXTENCODING_ASCII_US },
603 : { "cp367", RTL_TEXTENCODING_ASCII_US },
604 : { "csascii", RTL_TEXTENCODING_ASCII_US },
605 : { "ibm775", RTL_TEXTENCODING_IBM_775 },
606 : { "cp775", RTL_TEXTENCODING_IBM_775 },
607 : { "cspc775baltic", RTL_TEXTENCODING_IBM_775 },
608 : { "ibm850", RTL_TEXTENCODING_IBM_850 },
609 : { "cp850", RTL_TEXTENCODING_IBM_850 },
610 : { "850", RTL_TEXTENCODING_IBM_850 },
611 : { "cspc850multilingual", RTL_TEXTENCODING_IBM_850 },
612 : /* { "ibm851", RTL_TEXTENCODING_IBM_851 }, */
613 : /* { "cp851", RTL_TEXTENCODING_IBM_851 }, */
614 : /* { "851", RTL_TEXTENCODING_IBM_851 }, */
615 : /* { "csibm851", RTL_TEXTENCODING_IBM_851 }, */
616 : { "ibm852", RTL_TEXTENCODING_IBM_852 },
617 : { "cp852", RTL_TEXTENCODING_IBM_852 },
618 : { "852", RTL_TEXTENCODING_IBM_852 },
619 : { "cspcp852", RTL_TEXTENCODING_IBM_852 },
620 : { "ibm855", RTL_TEXTENCODING_IBM_855 },
621 : { "cp855", RTL_TEXTENCODING_IBM_855 },
622 : { "855", RTL_TEXTENCODING_IBM_855 },
623 : { "csibm855", RTL_TEXTENCODING_IBM_855 },
624 : { "ibm857", RTL_TEXTENCODING_IBM_857 },
625 : { "cp857", RTL_TEXTENCODING_IBM_857 },
626 : { "857", RTL_TEXTENCODING_IBM_857 },
627 : { "csibm857", RTL_TEXTENCODING_IBM_857 },
628 : { "ibm860", RTL_TEXTENCODING_IBM_860 },
629 : { "cp860", RTL_TEXTENCODING_IBM_860 },
630 : { "860", RTL_TEXTENCODING_IBM_860 },
631 : { "csibm860", RTL_TEXTENCODING_IBM_860 },
632 : { "ibm861", RTL_TEXTENCODING_IBM_861 },
633 : { "cp861", RTL_TEXTENCODING_IBM_861 },
634 : { "861", RTL_TEXTENCODING_IBM_861 },
635 : { "csis", RTL_TEXTENCODING_IBM_861 },
636 : { "csibm861", RTL_TEXTENCODING_IBM_861 },
637 : { "ibm862", RTL_TEXTENCODING_IBM_862 },
638 : { "cp862", RTL_TEXTENCODING_IBM_862 },
639 : { "862", RTL_TEXTENCODING_IBM_862 },
640 : { "cspc862latinhebrew", RTL_TEXTENCODING_IBM_862 },
641 : { "ibm863", RTL_TEXTENCODING_IBM_863 },
642 : { "cp863", RTL_TEXTENCODING_IBM_863 },
643 : { "863", RTL_TEXTENCODING_IBM_863 },
644 : { "csibm863", RTL_TEXTENCODING_IBM_863 },
645 : { "ibm864", RTL_TEXTENCODING_IBM_864 },
646 : { "cp864", RTL_TEXTENCODING_IBM_864 },
647 : { "864", RTL_TEXTENCODING_IBM_864 },
648 : { "csibm864", RTL_TEXTENCODING_IBM_864 },
649 : { "ibm865", RTL_TEXTENCODING_IBM_865 },
650 : { "cp865", RTL_TEXTENCODING_IBM_865 },
651 : { "865", RTL_TEXTENCODING_IBM_865 },
652 : { "csibm865", RTL_TEXTENCODING_IBM_865 },
653 : { "ibm866", RTL_TEXTENCODING_IBM_866 },
654 : { "cp866", RTL_TEXTENCODING_IBM_866 },
655 : { "866", RTL_TEXTENCODING_IBM_866 },
656 : { "csibm866", RTL_TEXTENCODING_IBM_866 },
657 : /* { "ibm868", RTL_TEXTENCODING_IBM_868 }, */
658 : /* { "cp868", RTL_TEXTENCODING_IBM_868 }, */
659 : /* { "cpar", RTL_TEXTENCODING_IBM_868 }, */
660 : /* { "csibm868", RTL_TEXTENCODING_IBM_868 }, */
661 : { "ibm869", RTL_TEXTENCODING_IBM_869 },
662 : { "cp869", RTL_TEXTENCODING_IBM_869 },
663 : { "869", RTL_TEXTENCODING_IBM_869 },
664 : { "cpgr", RTL_TEXTENCODING_IBM_869 },
665 : { "csibm869", RTL_TEXTENCODING_IBM_869 },
666 : { "ibm869", RTL_TEXTENCODING_IBM_869 },
667 : { "cp869", RTL_TEXTENCODING_IBM_869 },
668 : { "869", RTL_TEXTENCODING_IBM_869 },
669 : { "cpgr", RTL_TEXTENCODING_IBM_869 },
670 : { "csibm869", RTL_TEXTENCODING_IBM_869 },
671 : { "mac", RTL_TEXTENCODING_APPLE_ROMAN },
672 : { "csmacintosh", RTL_TEXTENCODING_APPLE_ROMAN },
673 : { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
674 : { "mskanji", RTL_TEXTENCODING_MS_932 },
675 : { "csshiftjis", RTL_TEXTENCODING_SHIFT_JIS },
676 : { "jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
677 : { "jisc62261983", RTL_TEXTENCODING_JIS_X_0208 },
678 : { "csiso87jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
679 : { "isoir86", RTL_TEXTENCODING_JIS_X_0208 },
680 : { "x0208", RTL_TEXTENCODING_JIS_X_0208 },
681 : { "jisx0201", RTL_TEXTENCODING_JIS_X_0201 },
682 : { "cshalfwidthkatakana", RTL_TEXTENCODING_JIS_X_0201 },
683 : { "x0201", RTL_TEXTENCODING_JIS_X_0201 },
684 : { "jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
685 : { "csiso159jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
686 : { "isoir159", RTL_TEXTENCODING_JIS_X_0208 },
687 : { "x0212", RTL_TEXTENCODING_JIS_X_0212 },
688 : { "isoir6", RTL_TEXTENCODING_ASCII_US },
689 : { "xsjis", RTL_TEXTENCODING_SHIFT_JIS },
690 : { "sjis", RTL_TEXTENCODING_SHIFT_JIS },
691 : { "ascii", RTL_TEXTENCODING_ASCII_US },
692 : { "us", RTL_TEXTENCODING_ASCII_US },
693 : { "gb180302000", RTL_TEXTENCODING_GB_18030 },
694 : /* This is no actual MIME character set name, it is only in here
695 : for backwards compatibility (before "GB18030" was officially
696 : registered with IANA, this code contained some guesses of what
697 : would become official names for GB18030). */
698 : { "gb18030", RTL_TEXTENCODING_GB_18030 },
699 : { "big5hkscs", RTL_TEXTENCODING_BIG5_HKSCS },
700 : { "tis620", RTL_TEXTENCODING_TIS_620 },
701 : { "gbk", RTL_TEXTENCODING_GBK },
702 : { "cp936", RTL_TEXTENCODING_GBK },
703 : { "ms936", RTL_TEXTENCODING_GBK },
704 : { "windows936", RTL_TEXTENCODING_GBK },
705 : { "cp874", RTL_TEXTENCODING_MS_874 },
706 : /* This is no official MIME character set name, but it might be in
707 : use in Thailand. */
708 : { "ms874", RTL_TEXTENCODING_MS_874 },
709 : /* This is no official MIME character set name, but it might be in
710 : use in Thailand. */
711 : { "windows874", RTL_TEXTENCODING_MS_874 },
712 : /* This is no official MIME character set name, but it might be in
713 : use in Thailand. */
714 : { "koi8u", RTL_TEXTENCODING_KOI8_U },
715 : { "cpis", RTL_TEXTENCODING_IBM_861 },
716 : { "ksc56011987", RTL_TEXTENCODING_MS_949 },
717 : { "isoir149", RTL_TEXTENCODING_MS_949 },
718 : { "ksc56011989", RTL_TEXTENCODING_MS_949 },
719 : { "ksc5601", RTL_TEXTENCODING_MS_949 },
720 : { "korean", RTL_TEXTENCODING_MS_949 },
721 : { "csksc56011987", RTL_TEXTENCODING_MS_949 },
722 : /* Map KS_C_5601-1987 and aliases to MS-949 instead of EUC-KR, as
723 : this character set identifier seems to be prominently used by MS
724 : to stand for KS C 5601 plus MS-949 extensions */
725 : { "latin9", RTL_TEXTENCODING_ISO_8859_15 },
726 : { "adobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
727 : { "csadobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
728 : { "adobesymbolencoding", RTL_TEXTENCODING_ADOBE_SYMBOL },
729 : { "cshppsmath", RTL_TEXTENCODING_ADOBE_SYMBOL },
730 : { "ptcp154", RTL_TEXTENCODING_PT154 },
731 : { "csptcp154", RTL_TEXTENCODING_PT154 },
732 : { "pt154", RTL_TEXTENCODING_PT154 },
733 : { "cp154", RTL_TEXTENCODING_PT154 },
734 : { "xisciide", RTL_TEXTENCODING_ISCII_DEVANAGARI },
735 : /* This is not an official MIME character set name, but is in use by
736 : various windows APIs. */
737 : { NULL, RTL_TEXTENCODING_DONTKNOW }
738 : };
739 :
740 6465 : rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
741 6465 : const ImplStrCharsetDef* pData = aVIPMimeCharsetTab;
742 6465 : sal_uInt32 nBufLen = strlen( pMimeCharset )+1;
743 :
744 : /* Alloc Buffer and map to lower case and remove non alphanumeric chars */
745 6465 : boost::scoped_array<char> pBuf(new char[nBufLen]);
746 6465 : Impl_toAsciiLowerAndRemoveNonAlphanumeric( pMimeCharset, pBuf.get() );
747 :
748 : /* Search for equal in the VIP table */
749 92231 : while ( pData->mpCharsetStr )
750 : {
751 85604 : if ( strcmp( pBuf.get(), pData->mpCharsetStr ) == 0 )
752 : {
753 6303 : eEncoding = pData->meTextEncoding;
754 6303 : break;
755 : }
756 :
757 79301 : pData++;
758 : }
759 :
760 : /* Search for matching in the mime table */
761 6465 : if ( eEncoding == RTL_TEXTENCODING_DONTKNOW )
762 : {
763 162 : pData = aMimeCharsetTab;
764 19934 : while ( pData->mpCharsetStr )
765 : {
766 19770 : if ( Impl_matchString( pBuf.get(), pData->mpCharsetStr ) )
767 : {
768 160 : eEncoding = pData->meTextEncoding;
769 160 : break;
770 : }
771 :
772 19610 : pData++;
773 : }
774 : }
775 :
776 6465 : return eEncoding;
777 : }
778 :
779 : /* ======================================================================= */
780 :
781 35564 : sal_uInt8 SAL_CALL rtl_getBestWindowsCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
782 : {
783 35564 : const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
784 35564 : if ( pData )
785 34364 : return pData->mnBestWindowsCharset;
786 : else
787 1200 : return 1;
788 : }
789 :
790 : /* ----------------------------------------------------------------------- */
791 :
792 160 : const char* SAL_CALL rtl_getBestUnixCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
793 : {
794 160 : const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
795 160 : if ( pData )
796 160 : return (char const *) pData->mpBestUnixCharset;
797 0 : else if( eTextEncoding == RTL_TEXTENCODING_UNICODE )
798 0 : return (char const *) "iso10646-1";
799 : else
800 0 : return 0;
801 : }
802 :
803 : /* ----------------------------------------------------------------------- */
804 :
805 8080 : char const * SAL_CALL rtl_getMimeCharsetFromTextEncoding(rtl_TextEncoding
806 : nEncoding)
807 : {
808 8080 : ImplTextEncodingData const * p = Impl_getTextEncodingData(nEncoding);
809 8056 : return p && (p->mnInfoFlags & RTL_TEXTENCODING_INFO_MIME) != 0 ?
810 15034 : p->mpBestMimeCharset : NULL;
811 : }
812 :
813 620 : const char* SAL_CALL rtl_getBestMimeCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
814 : {
815 620 : const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
816 620 : if ( pData )
817 620 : return (char const *) pData->mpBestMimeCharset;
818 : else
819 0 : return 0;
820 : }
821 :
822 : /* The following two functions are based on <http://www.sharmahd.com/tm/
823 : codepages.html>, <http://msdn.microsoft.com/workshop/author/dhtml/reference/
824 : charsets/charset4.asp>, and <http://www.iana.org/assignments/character-sets>.
825 : */
826 :
827 : rtl_TextEncoding SAL_CALL
828 3116 : rtl_getTextEncodingFromWindowsCodePage(sal_uInt32 nCodePage)
829 : {
830 3116 : switch (nCodePage)
831 : {
832 2 : case 437: return RTL_TEXTENCODING_IBM_437;
833 2 : case 708: return RTL_TEXTENCODING_ISO_8859_6;
834 2 : case 737: return RTL_TEXTENCODING_IBM_737;
835 2 : case 775: return RTL_TEXTENCODING_IBM_775;
836 2 : case 850: return RTL_TEXTENCODING_IBM_850;
837 2 : case 852: return RTL_TEXTENCODING_IBM_852;
838 2 : case 855: return RTL_TEXTENCODING_IBM_855;
839 2 : case 857: return RTL_TEXTENCODING_IBM_857;
840 2 : case 860: return RTL_TEXTENCODING_IBM_860;
841 2 : case 861: return RTL_TEXTENCODING_IBM_861;
842 2 : case 862: return RTL_TEXTENCODING_IBM_862;
843 2 : case 863: return RTL_TEXTENCODING_IBM_863;
844 2 : case 864: return RTL_TEXTENCODING_IBM_864;
845 2 : case 865: return RTL_TEXTENCODING_IBM_865;
846 2 : case 866: return RTL_TEXTENCODING_IBM_866;
847 2 : case 869: return RTL_TEXTENCODING_IBM_869;
848 4 : case 874: return RTL_TEXTENCODING_MS_874;
849 122 : case 932: return RTL_TEXTENCODING_MS_932;
850 28 : case 936: return RTL_TEXTENCODING_MS_936;
851 2 : case 949: return RTL_TEXTENCODING_MS_949;
852 10 : case 950: return RTL_TEXTENCODING_MS_950;
853 184 : case 1250: return RTL_TEXTENCODING_MS_1250;
854 152 : case 1251: return RTL_TEXTENCODING_MS_1251;
855 1614 : case 1252: return RTL_TEXTENCODING_MS_1252;
856 72 : case 1253: return RTL_TEXTENCODING_MS_1253;
857 74 : case 1254: return RTL_TEXTENCODING_MS_1254;
858 52 : case 1255: return RTL_TEXTENCODING_MS_1255;
859 48 : case 1256: return RTL_TEXTENCODING_MS_1256;
860 72 : case 1257: return RTL_TEXTENCODING_MS_1257;
861 60 : case 1258: return RTL_TEXTENCODING_MS_1258;
862 2 : case 1361: return RTL_TEXTENCODING_MS_1361;
863 2 : case 10000: return RTL_TEXTENCODING_APPLE_ROMAN;
864 2 : case 10001: return RTL_TEXTENCODING_APPLE_JAPANESE;
865 2 : case 10002: return RTL_TEXTENCODING_APPLE_CHINTRAD;
866 2 : case 10003: return RTL_TEXTENCODING_APPLE_KOREAN;
867 2 : case 10004: return RTL_TEXTENCODING_APPLE_ARABIC;
868 2 : case 10005: return RTL_TEXTENCODING_APPLE_HEBREW;
869 2 : case 10006: return RTL_TEXTENCODING_APPLE_GREEK;
870 2 : case 10007: return RTL_TEXTENCODING_APPLE_CYRILLIC;
871 2 : case 10008: return RTL_TEXTENCODING_APPLE_CHINSIMP;
872 2 : case 10010: return RTL_TEXTENCODING_APPLE_ROMANIAN;
873 2 : case 10017: return RTL_TEXTENCODING_APPLE_UKRAINIAN;
874 2 : case 10029: return RTL_TEXTENCODING_APPLE_CENTEURO;
875 2 : case 10079: return RTL_TEXTENCODING_APPLE_ICELAND;
876 2 : case 10081: return RTL_TEXTENCODING_APPLE_TURKISH;
877 2 : case 10082: return RTL_TEXTENCODING_APPLE_CROATIAN;
878 2 : case 20127: return RTL_TEXTENCODING_ASCII_US;
879 2 : case 20866: return RTL_TEXTENCODING_KOI8_R;
880 2 : case 21866: return RTL_TEXTENCODING_KOI8_U;
881 2 : case 28591: return RTL_TEXTENCODING_ISO_8859_1;
882 2 : case 28592: return RTL_TEXTENCODING_ISO_8859_2;
883 2 : case 28593: return RTL_TEXTENCODING_ISO_8859_3;
884 2 : case 28594: return RTL_TEXTENCODING_ISO_8859_4;
885 2 : case 28595: return RTL_TEXTENCODING_ISO_8859_5;
886 2 : case 28596: return RTL_TEXTENCODING_ISO_8859_6;
887 2 : case 28597: return RTL_TEXTENCODING_ISO_8859_7;
888 2 : case 28598: return RTL_TEXTENCODING_ISO_8859_8;
889 2 : case 28599: return RTL_TEXTENCODING_ISO_8859_9;
890 2 : case 28605: return RTL_TEXTENCODING_ISO_8859_15;
891 2 : case 50220: return RTL_TEXTENCODING_ISO_2022_JP;
892 2 : case 50225: return RTL_TEXTENCODING_ISO_2022_KR;
893 2 : case 51932: return RTL_TEXTENCODING_EUC_JP;
894 2 : case 51936: return RTL_TEXTENCODING_EUC_CN;
895 2 : case 51949: return RTL_TEXTENCODING_EUC_KR;
896 2 : case 57002: return RTL_TEXTENCODING_ISCII_DEVANAGARI;
897 2 : case 65000: return RTL_TEXTENCODING_UTF7;
898 322 : case 65001: return RTL_TEXTENCODING_UTF8;
899 196 : default: return RTL_TEXTENCODING_DONTKNOW;
900 : }
901 : }
902 :
903 : sal_uInt32 SAL_CALL
904 318 : rtl_getWindowsCodePageFromTextEncoding(rtl_TextEncoding nEncoding)
905 : {
906 318 : switch (nEncoding)
907 : {
908 2 : case RTL_TEXTENCODING_IBM_437: return 437;
909 : /* case RTL_TEXTENCODING_ISO_8859_6: return 708; */
910 2 : case RTL_TEXTENCODING_IBM_737: return 737;
911 2 : case RTL_TEXTENCODING_IBM_775: return 775;
912 2 : case RTL_TEXTENCODING_IBM_850: return 850;
913 2 : case RTL_TEXTENCODING_IBM_852: return 852;
914 2 : case RTL_TEXTENCODING_IBM_855: return 855;
915 2 : case RTL_TEXTENCODING_IBM_857: return 857;
916 2 : case RTL_TEXTENCODING_IBM_860: return 860;
917 2 : case RTL_TEXTENCODING_IBM_861: return 861;
918 2 : case RTL_TEXTENCODING_IBM_862: return 862;
919 2 : case RTL_TEXTENCODING_IBM_863: return 863;
920 2 : case RTL_TEXTENCODING_IBM_864: return 864;
921 2 : case RTL_TEXTENCODING_IBM_865: return 865;
922 2 : case RTL_TEXTENCODING_IBM_866: return 866;
923 2 : case RTL_TEXTENCODING_IBM_869: return 869;
924 2 : case RTL_TEXTENCODING_MS_874: return 874;
925 2 : case RTL_TEXTENCODING_MS_932: return 932;
926 2 : case RTL_TEXTENCODING_MS_936: return 936;
927 2 : case RTL_TEXTENCODING_MS_949: return 949;
928 2 : case RTL_TEXTENCODING_MS_950: return 950;
929 2 : case RTL_TEXTENCODING_MS_1250: return 1250;
930 2 : case RTL_TEXTENCODING_MS_1251: return 1251;
931 2 : case RTL_TEXTENCODING_MS_1252: return 1252;
932 2 : case RTL_TEXTENCODING_MS_1253: return 1253;
933 2 : case RTL_TEXTENCODING_MS_1254: return 1254;
934 2 : case RTL_TEXTENCODING_MS_1255: return 1255;
935 2 : case RTL_TEXTENCODING_MS_1256: return 1256;
936 2 : case RTL_TEXTENCODING_MS_1257: return 1257;
937 2 : case RTL_TEXTENCODING_MS_1258: return 1258;
938 2 : case RTL_TEXTENCODING_MS_1361: return 1361;
939 2 : case RTL_TEXTENCODING_APPLE_ROMAN: return 10000;
940 2 : case RTL_TEXTENCODING_APPLE_JAPANESE: return 10001;
941 2 : case RTL_TEXTENCODING_APPLE_CHINTRAD: return 10002;
942 2 : case RTL_TEXTENCODING_APPLE_KOREAN: return 10003;
943 2 : case RTL_TEXTENCODING_APPLE_ARABIC: return 10004;
944 2 : case RTL_TEXTENCODING_APPLE_HEBREW: return 10005;
945 2 : case RTL_TEXTENCODING_APPLE_GREEK: return 10006;
946 2 : case RTL_TEXTENCODING_APPLE_CYRILLIC: return 10007;
947 2 : case RTL_TEXTENCODING_APPLE_CHINSIMP: return 10008;
948 2 : case RTL_TEXTENCODING_APPLE_ROMANIAN: return 10010;
949 2 : case RTL_TEXTENCODING_APPLE_UKRAINIAN: return 10017;
950 2 : case RTL_TEXTENCODING_APPLE_CENTEURO: return 10029;
951 2 : case RTL_TEXTENCODING_APPLE_ICELAND: return 10079;
952 2 : case RTL_TEXTENCODING_APPLE_TURKISH: return 10081;
953 2 : case RTL_TEXTENCODING_APPLE_CROATIAN: return 10082;
954 2 : case RTL_TEXTENCODING_ASCII_US: return 20127;
955 2 : case RTL_TEXTENCODING_KOI8_R: return 20866;
956 2 : case RTL_TEXTENCODING_KOI8_U: return 21866;
957 2 : case RTL_TEXTENCODING_ISO_8859_1: return 28591;
958 2 : case RTL_TEXTENCODING_ISO_8859_2: return 28592;
959 2 : case RTL_TEXTENCODING_ISO_8859_3: return 28593;
960 2 : case RTL_TEXTENCODING_ISO_8859_4: return 28594;
961 2 : case RTL_TEXTENCODING_ISO_8859_5: return 28595;
962 2 : case RTL_TEXTENCODING_ISO_8859_6: return 28596;
963 2 : case RTL_TEXTENCODING_ISO_8859_7: return 28597;
964 2 : case RTL_TEXTENCODING_ISO_8859_8: return 28598;
965 2 : case RTL_TEXTENCODING_ISO_8859_9: return 28599;
966 2 : case RTL_TEXTENCODING_ISO_8859_15: return 28605;
967 2 : case RTL_TEXTENCODING_ISO_2022_JP: return 50220;
968 2 : case RTL_TEXTENCODING_ISO_2022_KR: return 50225;
969 2 : case RTL_TEXTENCODING_EUC_JP: return 51932;
970 2 : case RTL_TEXTENCODING_EUC_CN: return 51936;
971 2 : case RTL_TEXTENCODING_EUC_KR: return 51949;
972 2 : case RTL_TEXTENCODING_ISCII_DEVANAGARI: return 57002;
973 2 : case RTL_TEXTENCODING_UTF7: return 65000;
974 182 : case RTL_TEXTENCODING_UTF8: return 65001;
975 6 : default: return 0;
976 : }
977 : }
978 :
979 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|