Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "sal/config.h"
21 :
22 : #include <cstddef>
23 : #include <cstring>
24 :
25 : #include "rtl/tencinfo.h"
26 :
27 : #include "gettextencodingdata.hxx"
28 : #include "tenchelp.hxx"
29 :
30 0 : sal_Bool SAL_CALL rtl_isOctetTextEncoding(rtl_TextEncoding nEncoding)
31 : {
32 : return
33 : nEncoding > RTL_TEXTENCODING_DONTKNOW
34 0 : && nEncoding != 9 // RTL_TEXTENCODING_SYSTEM
35 0 : && nEncoding <= RTL_TEXTENCODING_ADOBE_DINGBATS; // always update this!
36 : }
37 :
38 : /* ======================================================================= */
39 :
40 0 : static void Impl_toAsciiLower( const char* pName, char* pBuf )
41 : {
42 0 : while ( *pName )
43 : {
44 : /* A-Z */
45 0 : if ( (*pName >= 0x41) && (*pName <= 0x5A) )
46 0 : *pBuf = (*pName)+0x20; /* toAsciiLower */
47 : else
48 0 : *pBuf = *pName;
49 :
50 0 : pBuf++;
51 0 : pName++;
52 : }
53 :
54 0 : *pBuf = '\0';
55 0 : }
56 :
57 : /* ----------------------------------------------------------------------- */
58 :
59 0 : static void Impl_toAsciiLowerAndRemoveNonAlphanumeric( const char* pName, char* pBuf )
60 : {
61 0 : while ( *pName )
62 : {
63 : /* A-Z */
64 0 : if ( (*pName >= 0x41) && (*pName <= 0x5A) )
65 : {
66 0 : *pBuf = (*pName)+0x20; /* toAsciiLower */
67 0 : pBuf++;
68 : }
69 : /* a-z, 0-9 */
70 0 : else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) ||
71 0 : ((*pName >= 0x30) && (*pName <= 0x39)) )
72 : {
73 0 : *pBuf = *pName;
74 0 : pBuf++;
75 : }
76 :
77 0 : pName++;
78 : }
79 :
80 0 : *pBuf = '\0';
81 0 : }
82 :
83 : /* ----------------------------------------------------------------------- */
84 :
85 : /* pMatchStr must match with all characters in pCompStr */
86 0 : static bool Impl_matchString( const char* pCompStr, const char* pMatchStr )
87 : {
88 : /* We test only for end in MatchStr, because the last 0 character from */
89 : /* pCompStr is unequal a character in MatchStr, so the loop terminates */
90 0 : while ( *pMatchStr )
91 : {
92 0 : if ( *pCompStr != *pMatchStr )
93 0 : return false;
94 :
95 0 : pCompStr++;
96 0 : pMatchStr++;
97 : }
98 :
99 0 : return true;
100 : }
101 :
102 : /* ======================================================================= */
103 :
104 : struct ImplStrCharsetDef
105 : {
106 : const char* mpCharsetStr;
107 : rtl_TextEncoding meTextEncoding;
108 : };
109 :
110 : struct ImplStrFirstPartCharsetDef
111 : {
112 : const char* mpCharsetStr;
113 : const ImplStrCharsetDef* mpSecondPartTab;
114 : };
115 :
116 : /* ======================================================================= */
117 :
118 255037 : sal_Bool SAL_CALL rtl_getTextEncodingInfo( rtl_TextEncoding eTextEncoding, rtl_TextEncodingInfo* pEncInfo )
119 : {
120 : const ImplTextEncodingData* pData;
121 :
122 255037 : pData = Impl_getTextEncodingData( eTextEncoding );
123 255037 : if ( !pData )
124 : {
125 : /* HACK: For not implemented encoding, because not all
126 : calls handle the errors */
127 0 : if ( pEncInfo->StructSize < 5 )
128 0 : return false;
129 0 : pEncInfo->MinimumCharSize = 1;
130 :
131 0 : if ( pEncInfo->StructSize < 6 )
132 0 : return true;
133 0 : pEncInfo->MaximumCharSize = 1;
134 :
135 0 : if ( pEncInfo->StructSize < 7 )
136 0 : return true;
137 0 : pEncInfo->AverageCharSize = 1;
138 :
139 0 : if ( pEncInfo->StructSize < 12 )
140 0 : return true;
141 0 : pEncInfo->Flags = 0;
142 :
143 0 : return false;
144 : }
145 :
146 255037 : if ( pEncInfo->StructSize < 5 )
147 0 : return false;
148 255037 : pEncInfo->MinimumCharSize = pData->mnMinCharSize;
149 :
150 255037 : if ( pEncInfo->StructSize < 6 )
151 0 : return true;
152 255037 : pEncInfo->MaximumCharSize = pData->mnMaxCharSize;
153 :
154 255037 : if ( pEncInfo->StructSize < 7 )
155 0 : return true;
156 255037 : pEncInfo->AverageCharSize = pData->mnAveCharSize;
157 :
158 255037 : if ( pEncInfo->StructSize < 12 )
159 0 : return true;
160 255037 : pEncInfo->Flags = pData->mnInfoFlags;
161 :
162 255037 : return true;
163 : }
164 :
165 : /* ======================================================================= */
166 :
167 0 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromWindowsCharset( sal_uInt8 nWinCharset )
168 : {
169 : rtl_TextEncoding eTextEncoding;
170 :
171 0 : switch ( nWinCharset )
172 : {
173 0 : case 0: eTextEncoding = RTL_TEXTENCODING_MS_1252; break; /* ANSI_CHARSET */
174 0 : case 2: eTextEncoding = RTL_TEXTENCODING_SYMBOL; break; /* SYMBOL_CHARSET */
175 0 : case 77: eTextEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break;/* MAC_CHARSET */
176 0 : case 128: eTextEncoding = RTL_TEXTENCODING_MS_932; break; /* SHIFTJIS_CHARSET */
177 0 : case 129: eTextEncoding = RTL_TEXTENCODING_MS_949; break; /* HANGEUL_CHARSET */
178 0 : case 130: eTextEncoding = RTL_TEXTENCODING_MS_1361; break; /* JOHAB_CHARSET */
179 0 : case 134: eTextEncoding = RTL_TEXTENCODING_MS_936; break; /* GB2312_CHARSET */
180 0 : case 136: eTextEncoding = RTL_TEXTENCODING_MS_950; break; /* CHINESEBIG5_CHARSET */
181 0 : case 161: eTextEncoding = RTL_TEXTENCODING_MS_1253; break; /* GREEK_CHARSET */
182 0 : case 162: eTextEncoding = RTL_TEXTENCODING_MS_1254; break; /* TURKISH_CHARSET */
183 0 : case 163: eTextEncoding = RTL_TEXTENCODING_MS_1258; break; /* VIETNAMESE_CHARSET !!! */
184 0 : case 177: eTextEncoding = RTL_TEXTENCODING_MS_1255; break; /* HEBREW_CHARSET */
185 0 : case 178: eTextEncoding = RTL_TEXTENCODING_MS_1256; break; /* ARABIC_CHARSET */
186 0 : case 186: eTextEncoding = RTL_TEXTENCODING_MS_1257; break; /* BALTIC_CHARSET */
187 0 : case 204: eTextEncoding = RTL_TEXTENCODING_MS_1251; break; /* RUSSIAN_CHARSET */
188 0 : case 222: eTextEncoding = RTL_TEXTENCODING_MS_874; break; /* THAI_CHARSET */
189 0 : case 238: eTextEncoding = RTL_TEXTENCODING_MS_1250; break; /* EASTEUROPE_CHARSET */
190 0 : case 255: eTextEncoding = RTL_TEXTENCODING_IBM_850; break; /* OEM_CHARSET */
191 0 : default: eTextEncoding = RTL_TEXTENCODING_DONTKNOW; break;
192 : };
193 :
194 0 : return eTextEncoding;
195 : }
196 :
197 : /* ----------------------------------------------------------------------- */
198 :
199 0 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromUnixCharset( const char* pUnixCharset )
200 : {
201 : /* See <ftp://ftp.x.org/pub/DOCS/registry>, section 14 ("Font Charset
202 : * (Registry and Encoding) Names").
203 : */
204 :
205 : /* All Identifiers in the tables are lower case The function search */
206 : /* for the first matching string in the tables. */
207 : /* Sort order: unique (first 14, than 1), important */
208 :
209 : static ImplStrCharsetDef const aUnixCharsetISOTab[] =
210 : {
211 : { "15", RTL_TEXTENCODING_ISO_8859_15 },
212 : { "14", RTL_TEXTENCODING_ISO_8859_14 },
213 : { "13", RTL_TEXTENCODING_ISO_8859_13 },
214 : { "11", RTL_TEXTENCODING_TIS_620 },
215 : { "10", RTL_TEXTENCODING_ISO_8859_10 },
216 : { "1", RTL_TEXTENCODING_ISO_8859_1 },
217 : { "2", RTL_TEXTENCODING_ISO_8859_2 },
218 : { "3", RTL_TEXTENCODING_ISO_8859_3 },
219 : { "4", RTL_TEXTENCODING_ISO_8859_4 },
220 : { "5", RTL_TEXTENCODING_ISO_8859_5 },
221 : { "6", RTL_TEXTENCODING_ISO_8859_6 },
222 : { "7", RTL_TEXTENCODING_ISO_8859_7 },
223 : { "8", RTL_TEXTENCODING_ISO_8859_8 },
224 : { "9", RTL_TEXTENCODING_ISO_8859_9 },
225 : { NULL, RTL_TEXTENCODING_DONTKNOW }
226 : };
227 :
228 : static ImplStrCharsetDef const aUnixCharsetADOBETab[] =
229 : {
230 : { "fontspecific", RTL_TEXTENCODING_SYMBOL },
231 : { NULL, RTL_TEXTENCODING_DONTKNOW }
232 : };
233 :
234 : static ImplStrCharsetDef const aUnixCharsetMSTab[] =
235 : {
236 : { "1252", RTL_TEXTENCODING_MS_1252 },
237 : { "1250", RTL_TEXTENCODING_MS_1250 },
238 : { "1251", RTL_TEXTENCODING_MS_1251 },
239 : { "1253", RTL_TEXTENCODING_MS_1253 },
240 : { "1254", RTL_TEXTENCODING_MS_1254 },
241 : { "1255", RTL_TEXTENCODING_MS_1255 },
242 : { "1256", RTL_TEXTENCODING_MS_1256 },
243 : { "1257", RTL_TEXTENCODING_MS_1257 },
244 : { "1258", RTL_TEXTENCODING_MS_1258 },
245 : { "932", RTL_TEXTENCODING_MS_932 },
246 : { "936", RTL_TEXTENCODING_MS_936 },
247 : { "949", RTL_TEXTENCODING_MS_949 },
248 : { "950", RTL_TEXTENCODING_MS_950 },
249 : { "1361", RTL_TEXTENCODING_MS_1361 },
250 : { "cp1252", RTL_TEXTENCODING_MS_1252 },
251 : { "cp1250", RTL_TEXTENCODING_MS_1250 },
252 : { "cp1251", RTL_TEXTENCODING_MS_1251 },
253 : { "cp1253", RTL_TEXTENCODING_MS_1253 },
254 : { "cp1254", RTL_TEXTENCODING_MS_1254 },
255 : { "cp1255", RTL_TEXTENCODING_MS_1255 },
256 : { "cp1256", RTL_TEXTENCODING_MS_1256 },
257 : { "cp1257", RTL_TEXTENCODING_MS_1257 },
258 : { "cp1258", RTL_TEXTENCODING_MS_1258 },
259 : { "cp932", RTL_TEXTENCODING_MS_932 },
260 : { "cp936", RTL_TEXTENCODING_MS_936 },
261 : { "cp949", RTL_TEXTENCODING_MS_949 },
262 : { "cp950", RTL_TEXTENCODING_MS_950 },
263 : { "cp1361", RTL_TEXTENCODING_MS_1361 },
264 : { NULL, RTL_TEXTENCODING_DONTKNOW }
265 : };
266 :
267 : static ImplStrCharsetDef const aUnixCharsetIBMTab[] =
268 : {
269 : { "437", RTL_TEXTENCODING_IBM_437 },
270 : { "850", RTL_TEXTENCODING_IBM_850 },
271 : { "860", RTL_TEXTENCODING_IBM_860 },
272 : { "861", RTL_TEXTENCODING_IBM_861 },
273 : { "863", RTL_TEXTENCODING_IBM_863 },
274 : { "865", RTL_TEXTENCODING_IBM_865 },
275 : { "737", RTL_TEXTENCODING_IBM_737 },
276 : { "775", RTL_TEXTENCODING_IBM_775 },
277 : { "852", RTL_TEXTENCODING_IBM_852 },
278 : { "855", RTL_TEXTENCODING_IBM_855 },
279 : { "857", RTL_TEXTENCODING_IBM_857 },
280 : { "862", RTL_TEXTENCODING_IBM_862 },
281 : { "864", RTL_TEXTENCODING_IBM_864 },
282 : { "866", RTL_TEXTENCODING_IBM_866 },
283 : { "869", RTL_TEXTENCODING_IBM_869 },
284 : { "874", RTL_TEXTENCODING_MS_874 },
285 : { "1004", RTL_TEXTENCODING_MS_1252 },
286 : { "65400", RTL_TEXTENCODING_SYMBOL },
287 : { NULL, RTL_TEXTENCODING_DONTKNOW }
288 : };
289 :
290 : static ImplStrCharsetDef const aUnixCharsetKOI8Tab[] =
291 : {
292 : { "r", RTL_TEXTENCODING_KOI8_R },
293 : { "u", RTL_TEXTENCODING_KOI8_U },
294 : { NULL, RTL_TEXTENCODING_DONTKNOW }
295 : };
296 :
297 : static ImplStrCharsetDef const aUnixCharsetJISX0208Tab[] =
298 : {
299 : { NULL, RTL_TEXTENCODING_JIS_X_0208 }
300 : };
301 :
302 : static ImplStrCharsetDef const aUnixCharsetJISX0201Tab[] =
303 : {
304 : { NULL, RTL_TEXTENCODING_JIS_X_0201 }
305 : };
306 :
307 : static ImplStrCharsetDef const aUnixCharsetJISX0212Tab[] =
308 : {
309 : { NULL, RTL_TEXTENCODING_JIS_X_0212 }
310 : };
311 :
312 : static ImplStrCharsetDef const aUnixCharsetGBTab[] =
313 : {
314 : { NULL, RTL_TEXTENCODING_GB_2312 }
315 : };
316 :
317 : static ImplStrCharsetDef const aUnixCharsetGBKTab[] =
318 : {
319 : { NULL, RTL_TEXTENCODING_GBK }
320 : };
321 :
322 : static ImplStrCharsetDef const aUnixCharsetBIG5Tab[] =
323 : {
324 : { NULL, RTL_TEXTENCODING_BIG5 }
325 : };
326 :
327 : static ImplStrCharsetDef const aUnixCharsetKSC56011987Tab[] =
328 : {
329 : { NULL, RTL_TEXTENCODING_EUC_KR }
330 : };
331 :
332 : static ImplStrCharsetDef const aUnixCharsetKSC56011992Tab[] =
333 : {
334 : { NULL, RTL_TEXTENCODING_MS_1361 }
335 : };
336 :
337 : static ImplStrCharsetDef const aUnixCharsetISO10646Tab[] =
338 : {
339 : { NULL, RTL_TEXTENCODING_UNICODE }
340 : };
341 :
342 : static ImplStrCharsetDef const aUnixCharsetUNICODETab[] =
343 : {
344 : /* Currently every Unicode Encoding is for us Unicode */
345 : /* { "fontspecific", RTL_TEXTENCODING_UNICODE }, */
346 : { NULL, RTL_TEXTENCODING_UNICODE }
347 : };
348 :
349 : static ImplStrCharsetDef const aUnixCharsetSymbolTab[] =
350 : {
351 : { NULL, RTL_TEXTENCODING_SYMBOL }
352 : };
353 :
354 : /* See <http://cvs.freedesktop.org/xorg/xc/fonts/encodings/iso8859-11.enc?
355 : rev=1.1.1.1>: */
356 : static ImplStrCharsetDef const aUnixCharsetTIS620Tab[] =
357 : {
358 : { "0", RTL_TEXTENCODING_TIS_620 },
359 : { "2529", RTL_TEXTENCODING_TIS_620 },
360 : { "2533", RTL_TEXTENCODING_TIS_620 },
361 : { NULL, RTL_TEXTENCODING_DONTKNOW }
362 : };
363 : static ImplStrCharsetDef const aUnixCharsetTIS6202529Tab[] =
364 : {
365 : { "1", RTL_TEXTENCODING_TIS_620 },
366 : { NULL, RTL_TEXTENCODING_DONTKNOW }
367 : };
368 : static ImplStrCharsetDef const aUnixCharsetTIS6202533Tab[] =
369 : {
370 : { "0", RTL_TEXTENCODING_TIS_620 },
371 : { "1", RTL_TEXTENCODING_TIS_620 },
372 : { NULL, RTL_TEXTENCODING_DONTKNOW }
373 : };
374 :
375 : static ImplStrFirstPartCharsetDef const aUnixCharsetFirstPartTab[] =
376 : {
377 : { "iso8859", aUnixCharsetISOTab },
378 : { "adobe", aUnixCharsetADOBETab },
379 : { "ansi", aUnixCharsetMSTab },
380 : { "microsoft", aUnixCharsetMSTab },
381 : { "ibm", aUnixCharsetIBMTab },
382 : { "koi8", aUnixCharsetKOI8Tab },
383 : { "jisx0208", aUnixCharsetJISX0208Tab },
384 : { "jisx0208.1983", aUnixCharsetJISX0208Tab },
385 : { "jisx0201", aUnixCharsetJISX0201Tab },
386 : { "jisx0201.1976", aUnixCharsetJISX0201Tab },
387 : { "jisx0212", aUnixCharsetJISX0212Tab },
388 : { "jisx0212.1990", aUnixCharsetJISX0212Tab },
389 : { "gb2312", aUnixCharsetGBTab },
390 : { "gbk", aUnixCharsetGBKTab },
391 : { "big5", aUnixCharsetBIG5Tab },
392 : { "iso10646", aUnixCharsetISO10646Tab },
393 : /* { "unicode", aUnixCharsetUNICODETab }, */ /* fonts contain only default chars */
394 : { "sunolcursor", aUnixCharsetSymbolTab },
395 : { "sunolglyph", aUnixCharsetSymbolTab },
396 : { "iso10646", aUnixCharsetUNICODETab },
397 : { "ksc5601.1987", aUnixCharsetKSC56011987Tab },
398 : { "ksc5601.1992", aUnixCharsetKSC56011992Tab },
399 : { "tis620.2529", aUnixCharsetTIS6202529Tab },
400 : { "tis620.2533", aUnixCharsetTIS6202533Tab },
401 : { "tis620", aUnixCharsetTIS620Tab },
402 : /* { "sunudcja.1997", }, */
403 : /* { "sunudcko.1997", }, */
404 : /* { "sunudczh.1997", }, */
405 : /* { "sunudczhtw.1997", }, */
406 : { NULL, NULL }
407 : };
408 :
409 0 : rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
410 : char* pBuf;
411 : char* pTempBuf;
412 0 : sal_uInt32 nBufLen = strlen( pUnixCharset )+1;
413 : const char* pFirstPart;
414 : const char* pSecondPart;
415 :
416 : /* Alloc Buffer and map to lower case */
417 0 : pBuf = new char[nBufLen];
418 0 : Impl_toAsciiLower( pUnixCharset, pBuf );
419 :
420 : /* Search FirstPart */
421 0 : pFirstPart = pBuf;
422 0 : pSecondPart = NULL;
423 0 : pTempBuf = pBuf;
424 0 : while ( *pTempBuf )
425 : {
426 0 : if ( *pTempBuf == '-' )
427 : {
428 0 : *pTempBuf = '\0';
429 0 : pSecondPart = pTempBuf+1;
430 0 : break;
431 : }
432 :
433 0 : pTempBuf++;
434 : }
435 :
436 : /* Parttrenner gefunden */
437 0 : if ( pSecondPart )
438 : {
439 : /* Search for the part tab */
440 0 : const ImplStrFirstPartCharsetDef* pFirstPartData = aUnixCharsetFirstPartTab;
441 0 : while ( pFirstPartData->mpCharsetStr )
442 : {
443 0 : if ( Impl_matchString( pFirstPart, pFirstPartData->mpCharsetStr ) )
444 : {
445 : /* Search for the charset in the second part tab */
446 0 : const ImplStrCharsetDef* pData = pFirstPartData->mpSecondPartTab;
447 0 : while ( pData->mpCharsetStr )
448 : {
449 0 : if ( Impl_matchString( pSecondPart, pData->mpCharsetStr ) )
450 : {
451 0 : break;
452 : }
453 :
454 0 : pData++;
455 : }
456 :
457 : /* use default encoding for first part */
458 0 : eEncoding = pData->meTextEncoding;
459 0 : break;
460 : }
461 :
462 0 : pFirstPartData++;
463 : }
464 : }
465 :
466 0 : delete[] pBuf;
467 :
468 0 : return eEncoding;
469 : }
470 :
471 : /* ----------------------------------------------------------------------- */
472 :
473 0 : rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromMimeCharset( const char* pMimeCharset )
474 : {
475 : /* All Identifiers are in lower case and contain only alphanumeric */
476 : /* characters. The function search for the first equal string in */
477 : /* the table. In this table are only the most used mime types. */
478 : /* Sort order: important */
479 : static ImplStrCharsetDef const aVIPMimeCharsetTab[] =
480 : {
481 : { "usascii", RTL_TEXTENCODING_ASCII_US },
482 : { "utf8", RTL_TEXTENCODING_UTF8 },
483 : { "utf7", RTL_TEXTENCODING_UTF7 },
484 : { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
485 : { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
486 : { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
487 : { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
488 : { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
489 : { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
490 : { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
491 : { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
492 : { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
493 : { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
494 : { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
495 : { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
496 : { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
497 : { "iso2022jp", RTL_TEXTENCODING_ISO_2022_JP },
498 : { "iso2022jp2", RTL_TEXTENCODING_ISO_2022_JP },
499 : { "iso2022cn", RTL_TEXTENCODING_ISO_2022_CN },
500 : { "iso2022cnext", RTL_TEXTENCODING_ISO_2022_CN },
501 : { "iso2022kr", RTL_TEXTENCODING_ISO_2022_KR },
502 : { "eucjp", RTL_TEXTENCODING_EUC_JP },
503 : { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
504 : { "mskanji", RTL_TEXTENCODING_MS_932 },
505 : { "gb2312", RTL_TEXTENCODING_GB_2312 },
506 : { "cngb", RTL_TEXTENCODING_GB_2312 },
507 : { "big5", RTL_TEXTENCODING_BIG5 },
508 : { "cnbig5", RTL_TEXTENCODING_BIG5 },
509 : { "cngb12345", RTL_TEXTENCODING_GBT_12345 },
510 : { "euckr", RTL_TEXTENCODING_EUC_KR },
511 : { "koi8r", RTL_TEXTENCODING_KOI8_R },
512 : { "windows1252", RTL_TEXTENCODING_MS_1252 },
513 : { "windows1250", RTL_TEXTENCODING_MS_1250 },
514 : { "windows1251", RTL_TEXTENCODING_MS_1251 },
515 : { "windows1253", RTL_TEXTENCODING_MS_1253 },
516 : { "windows1254", RTL_TEXTENCODING_MS_1254 },
517 : { "windows1255", RTL_TEXTENCODING_MS_1255 },
518 : { "windows1256", RTL_TEXTENCODING_MS_1256 },
519 : { "windows1257", RTL_TEXTENCODING_MS_1257 },
520 : { "windows1258", RTL_TEXTENCODING_MS_1258 },
521 : { NULL, RTL_TEXTENCODING_DONTKNOW }
522 : };
523 :
524 : /* All Identifiers are in lower case and contain only alphanumeric */
525 : /* characters. The function search for the first matching string in */
526 : /* the table. */
527 : /* Sort order: unique (first iso885914, than iso88591), important */
528 : static ImplStrCharsetDef const aMimeCharsetTab[] =
529 : {
530 : { "unicode11utf7", RTL_TEXTENCODING_UTF7 },
531 : { "caunicode11utf7", RTL_TEXTENCODING_UTF7 },
532 : { "iso88591windows30", RTL_TEXTENCODING_ISO_8859_1 },
533 : { "iso88591win", RTL_TEXTENCODING_MS_1252 },
534 : { "iso88592win", RTL_TEXTENCODING_MS_1250 },
535 : { "iso88599win", RTL_TEXTENCODING_MS_1254 },
536 : { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
537 : { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
538 : { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
539 : { "iso885911", RTL_TEXTENCODING_TIS_620 },
540 : /* This is no official MIME character set name, but it might be in
541 : use in Thailand. */
542 : { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
543 : { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
544 : { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
545 : { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
546 : { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
547 : { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
548 : { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
549 : { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
550 : { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
551 : { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
552 : { "isoir100", RTL_TEXTENCODING_ISO_8859_1 },
553 : { "latin1", RTL_TEXTENCODING_ISO_8859_1 },
554 : { "l1", RTL_TEXTENCODING_ISO_8859_1 },
555 : { "cp819", RTL_TEXTENCODING_ISO_8859_1 },
556 : { "ibm819", RTL_TEXTENCODING_ISO_8859_1 },
557 : { "csisolatin1", RTL_TEXTENCODING_ISO_8859_1 },
558 : { "isoir101", RTL_TEXTENCODING_ISO_8859_2 },
559 : { "latin2", RTL_TEXTENCODING_ISO_8859_2 },
560 : { "l2", RTL_TEXTENCODING_ISO_8859_2 },
561 : { "csisolatin2", RTL_TEXTENCODING_ISO_8859_2 },
562 : { "isoir109", RTL_TEXTENCODING_ISO_8859_3 },
563 : { "latin3", RTL_TEXTENCODING_ISO_8859_3 },
564 : { "l3", RTL_TEXTENCODING_ISO_8859_3 },
565 : { "csisolatin3", RTL_TEXTENCODING_ISO_8859_3 },
566 : { "isoir110", RTL_TEXTENCODING_ISO_8859_4 },
567 : { "latin4", RTL_TEXTENCODING_ISO_8859_4 },
568 : { "l4", RTL_TEXTENCODING_ISO_8859_4 },
569 : { "csisolatin4", RTL_TEXTENCODING_ISO_8859_4 },
570 : { "isoir144", RTL_TEXTENCODING_ISO_8859_5 },
571 : { "cyrillicasian", RTL_TEXTENCODING_PT154 },
572 : { "cyrillic", RTL_TEXTENCODING_ISO_8859_5 },
573 : { "csisolatincyrillic", RTL_TEXTENCODING_ISO_8859_5 },
574 : { "isoir127", RTL_TEXTENCODING_ISO_8859_6 },
575 : { "arabic", RTL_TEXTENCODING_ISO_8859_6 },
576 : { "csisolatinarabic", RTL_TEXTENCODING_ISO_8859_6 },
577 : { "ecma114", RTL_TEXTENCODING_ISO_8859_6 },
578 : { "asmo708", RTL_TEXTENCODING_ISO_8859_6 },
579 : { "isoir126", RTL_TEXTENCODING_ISO_8859_7 },
580 : { "greek", RTL_TEXTENCODING_ISO_8859_7 },
581 : { "csisolatingreek", RTL_TEXTENCODING_ISO_8859_7 },
582 : { "elot928", RTL_TEXTENCODING_ISO_8859_7 },
583 : { "ecma118", RTL_TEXTENCODING_ISO_8859_7 },
584 : { "isoir138", RTL_TEXTENCODING_ISO_8859_8 },
585 : { "hebrew", RTL_TEXTENCODING_ISO_8859_8 },
586 : { "csisolatinhebrew", RTL_TEXTENCODING_ISO_8859_8 },
587 : { "isoir148", RTL_TEXTENCODING_ISO_8859_9 },
588 : { "latin5", RTL_TEXTENCODING_ISO_8859_9 },
589 : { "l5", RTL_TEXTENCODING_ISO_8859_9 },
590 : { "csisolatin5", RTL_TEXTENCODING_ISO_8859_9 },
591 : { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
592 : { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
593 : { "cswindows31latin1", RTL_TEXTENCODING_MS_1252 },
594 : { "cswindows31latin2", RTL_TEXTENCODING_MS_1250 },
595 : { "cswindows31latin5", RTL_TEXTENCODING_MS_1254 },
596 : { "iso10646us", RTL_TEXTENCODING_ASCII_US },
597 : { "iso646irv", RTL_TEXTENCODING_ASCII_US },
598 : { "cskoi8r", RTL_TEXTENCODING_KOI8_R },
599 : { "ibm437", RTL_TEXTENCODING_IBM_437 },
600 : { "cp437", RTL_TEXTENCODING_IBM_437 },
601 : { "437", RTL_TEXTENCODING_IBM_437 },
602 : { "cspc8codepage437", RTL_TEXTENCODING_IBM_437 },
603 : { "ansix34", RTL_TEXTENCODING_ASCII_US },
604 : { "ibm367", RTL_TEXTENCODING_ASCII_US },
605 : { "cp367", RTL_TEXTENCODING_ASCII_US },
606 : { "csascii", RTL_TEXTENCODING_ASCII_US },
607 : { "ibm775", RTL_TEXTENCODING_IBM_775 },
608 : { "cp775", RTL_TEXTENCODING_IBM_775 },
609 : { "cspc775baltic", RTL_TEXTENCODING_IBM_775 },
610 : { "ibm850", RTL_TEXTENCODING_IBM_850 },
611 : { "cp850", RTL_TEXTENCODING_IBM_850 },
612 : { "850", RTL_TEXTENCODING_IBM_850 },
613 : { "cspc850multilingual", RTL_TEXTENCODING_IBM_850 },
614 : /* { "ibm851", RTL_TEXTENCODING_IBM_851 }, */
615 : /* { "cp851", RTL_TEXTENCODING_IBM_851 }, */
616 : /* { "851", RTL_TEXTENCODING_IBM_851 }, */
617 : /* { "csibm851", RTL_TEXTENCODING_IBM_851 }, */
618 : { "ibm852", RTL_TEXTENCODING_IBM_852 },
619 : { "cp852", RTL_TEXTENCODING_IBM_852 },
620 : { "852", RTL_TEXTENCODING_IBM_852 },
621 : { "cspcp852", RTL_TEXTENCODING_IBM_852 },
622 : { "ibm855", RTL_TEXTENCODING_IBM_855 },
623 : { "cp855", RTL_TEXTENCODING_IBM_855 },
624 : { "855", RTL_TEXTENCODING_IBM_855 },
625 : { "csibm855", RTL_TEXTENCODING_IBM_855 },
626 : { "ibm857", RTL_TEXTENCODING_IBM_857 },
627 : { "cp857", RTL_TEXTENCODING_IBM_857 },
628 : { "857", RTL_TEXTENCODING_IBM_857 },
629 : { "csibm857", RTL_TEXTENCODING_IBM_857 },
630 : { "ibm860", RTL_TEXTENCODING_IBM_860 },
631 : { "cp860", RTL_TEXTENCODING_IBM_860 },
632 : { "860", RTL_TEXTENCODING_IBM_860 },
633 : { "csibm860", RTL_TEXTENCODING_IBM_860 },
634 : { "ibm861", RTL_TEXTENCODING_IBM_861 },
635 : { "cp861", RTL_TEXTENCODING_IBM_861 },
636 : { "861", RTL_TEXTENCODING_IBM_861 },
637 : { "csis", RTL_TEXTENCODING_IBM_861 },
638 : { "csibm861", RTL_TEXTENCODING_IBM_861 },
639 : { "ibm862", RTL_TEXTENCODING_IBM_862 },
640 : { "cp862", RTL_TEXTENCODING_IBM_862 },
641 : { "862", RTL_TEXTENCODING_IBM_862 },
642 : { "cspc862latinhebrew", RTL_TEXTENCODING_IBM_862 },
643 : { "ibm863", RTL_TEXTENCODING_IBM_863 },
644 : { "cp863", RTL_TEXTENCODING_IBM_863 },
645 : { "863", RTL_TEXTENCODING_IBM_863 },
646 : { "csibm863", RTL_TEXTENCODING_IBM_863 },
647 : { "ibm864", RTL_TEXTENCODING_IBM_864 },
648 : { "cp864", RTL_TEXTENCODING_IBM_864 },
649 : { "864", RTL_TEXTENCODING_IBM_864 },
650 : { "csibm864", RTL_TEXTENCODING_IBM_864 },
651 : { "ibm865", RTL_TEXTENCODING_IBM_865 },
652 : { "cp865", RTL_TEXTENCODING_IBM_865 },
653 : { "865", RTL_TEXTENCODING_IBM_865 },
654 : { "csibm865", RTL_TEXTENCODING_IBM_865 },
655 : { "ibm866", RTL_TEXTENCODING_IBM_866 },
656 : { "cp866", RTL_TEXTENCODING_IBM_866 },
657 : { "866", RTL_TEXTENCODING_IBM_866 },
658 : { "csibm866", RTL_TEXTENCODING_IBM_866 },
659 : /* { "ibm868", RTL_TEXTENCODING_IBM_868 }, */
660 : /* { "cp868", RTL_TEXTENCODING_IBM_868 }, */
661 : /* { "cpar", RTL_TEXTENCODING_IBM_868 }, */
662 : /* { "csibm868", RTL_TEXTENCODING_IBM_868 }, */
663 : { "ibm869", RTL_TEXTENCODING_IBM_869 },
664 : { "cp869", RTL_TEXTENCODING_IBM_869 },
665 : { "869", RTL_TEXTENCODING_IBM_869 },
666 : { "cpgr", RTL_TEXTENCODING_IBM_869 },
667 : { "csibm869", RTL_TEXTENCODING_IBM_869 },
668 : { "ibm869", RTL_TEXTENCODING_IBM_869 },
669 : { "cp869", RTL_TEXTENCODING_IBM_869 },
670 : { "869", RTL_TEXTENCODING_IBM_869 },
671 : { "cpgr", RTL_TEXTENCODING_IBM_869 },
672 : { "csibm869", RTL_TEXTENCODING_IBM_869 },
673 : { "mac", RTL_TEXTENCODING_APPLE_ROMAN },
674 : { "csmacintosh", RTL_TEXTENCODING_APPLE_ROMAN },
675 : { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
676 : { "mskanji", RTL_TEXTENCODING_MS_932 },
677 : { "csshiftjis", RTL_TEXTENCODING_SHIFT_JIS },
678 : { "jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
679 : { "jisc62261983", RTL_TEXTENCODING_JIS_X_0208 },
680 : { "csiso87jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
681 : { "isoir86", RTL_TEXTENCODING_JIS_X_0208 },
682 : { "x0208", RTL_TEXTENCODING_JIS_X_0208 },
683 : { "jisx0201", RTL_TEXTENCODING_JIS_X_0201 },
684 : { "cshalfwidthkatakana", RTL_TEXTENCODING_JIS_X_0201 },
685 : { "x0201", RTL_TEXTENCODING_JIS_X_0201 },
686 : { "jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
687 : { "csiso159jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
688 : { "isoir159", RTL_TEXTENCODING_JIS_X_0208 },
689 : { "x0212", RTL_TEXTENCODING_JIS_X_0212 },
690 : { "isoir6", RTL_TEXTENCODING_ASCII_US },
691 : { "xsjis", RTL_TEXTENCODING_SHIFT_JIS },
692 : { "sjis", RTL_TEXTENCODING_SHIFT_JIS },
693 : { "ascii", RTL_TEXTENCODING_ASCII_US },
694 : { "us", RTL_TEXTENCODING_ASCII_US },
695 : { "gb180302000", RTL_TEXTENCODING_GB_18030 },
696 : /* This is no actual MIME character set name, it is only in here
697 : for backwards compatibility (before "GB18030" was officially
698 : registered with IANA, this code contained some guesses of what
699 : would become official names for GB18030). */
700 : { "gb18030", RTL_TEXTENCODING_GB_18030 },
701 : { "big5hkscs", RTL_TEXTENCODING_BIG5_HKSCS },
702 : { "tis620", RTL_TEXTENCODING_TIS_620 },
703 : { "gbk", RTL_TEXTENCODING_GBK },
704 : { "cp936", RTL_TEXTENCODING_GBK },
705 : { "ms936", RTL_TEXTENCODING_GBK },
706 : { "windows936", RTL_TEXTENCODING_GBK },
707 : { "cp874", RTL_TEXTENCODING_MS_874 },
708 : /* This is no official MIME character set name, but it might be in
709 : use in Thailand. */
710 : { "ms874", RTL_TEXTENCODING_MS_874 },
711 : /* This is no official MIME character set name, but it might be in
712 : use in Thailand. */
713 : { "windows874", RTL_TEXTENCODING_MS_874 },
714 : /* This is no official MIME character set name, but it might be in
715 : use in Thailand. */
716 : { "koi8u", RTL_TEXTENCODING_KOI8_U },
717 : { "cpis", RTL_TEXTENCODING_IBM_861 },
718 : { "ksc56011987", RTL_TEXTENCODING_MS_949 },
719 : { "isoir149", RTL_TEXTENCODING_MS_949 },
720 : { "ksc56011989", RTL_TEXTENCODING_MS_949 },
721 : { "ksc5601", RTL_TEXTENCODING_MS_949 },
722 : { "korean", RTL_TEXTENCODING_MS_949 },
723 : { "csksc56011987", RTL_TEXTENCODING_MS_949 },
724 : /* Map KS_C_5601-1987 and aliases to MS-949 instead of EUC-KR, as
725 : this character set identifier seems to be prominently used by MS
726 : to stand for KS C 5601 plus MS-949 extensions */
727 : { "latin9", RTL_TEXTENCODING_ISO_8859_15 },
728 : { "adobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
729 : { "csadobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
730 : { "adobesymbolencoding", RTL_TEXTENCODING_ADOBE_SYMBOL },
731 : { "cshppsmath", RTL_TEXTENCODING_ADOBE_SYMBOL },
732 : { "ptcp154", RTL_TEXTENCODING_PT154 },
733 : { "csptcp154", RTL_TEXTENCODING_PT154 },
734 : { "pt154", RTL_TEXTENCODING_PT154 },
735 : { "cp154", RTL_TEXTENCODING_PT154 },
736 : { "xisciide", RTL_TEXTENCODING_ISCII_DEVANAGARI },
737 : /* This is not an official MIME character set name, but is in use by
738 : various windows APIs. */
739 : { NULL, RTL_TEXTENCODING_DONTKNOW }
740 : };
741 :
742 0 : rtl_TextEncoding eEncoding = RTL_TEXTENCODING_DONTKNOW;
743 : char* pBuf;
744 0 : const ImplStrCharsetDef* pData = aVIPMimeCharsetTab;
745 0 : sal_uInt32 nBufLen = strlen( pMimeCharset )+1;
746 :
747 : /* Alloc Buffer and map to lower case and remove non alphanumeric chars */
748 0 : pBuf = new char[nBufLen];
749 0 : Impl_toAsciiLowerAndRemoveNonAlphanumeric( pMimeCharset, pBuf );
750 :
751 : /* Search for equal in the VIP table */
752 0 : while ( pData->mpCharsetStr )
753 : {
754 0 : if ( strcmp( pBuf, pData->mpCharsetStr ) == 0 )
755 : {
756 0 : eEncoding = pData->meTextEncoding;
757 0 : break;
758 : }
759 :
760 0 : pData++;
761 : }
762 :
763 : /* Search for matching in the mime table */
764 0 : if ( eEncoding == RTL_TEXTENCODING_DONTKNOW )
765 : {
766 0 : pData = aMimeCharsetTab;
767 0 : while ( pData->mpCharsetStr )
768 : {
769 0 : if ( Impl_matchString( pBuf, pData->mpCharsetStr ) )
770 : {
771 0 : eEncoding = pData->meTextEncoding;
772 0 : break;
773 : }
774 :
775 0 : pData++;
776 : }
777 : }
778 :
779 0 : delete[] pBuf;
780 :
781 0 : return eEncoding;
782 : }
783 :
784 : /* ======================================================================= */
785 :
786 0 : sal_uInt8 SAL_CALL rtl_getBestWindowsCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
787 : {
788 0 : const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
789 0 : if ( pData )
790 0 : return pData->mnBestWindowsCharset;
791 : else
792 0 : return 1;
793 : }
794 :
795 : /* ----------------------------------------------------------------------- */
796 :
797 0 : const char* SAL_CALL rtl_getBestUnixCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
798 : {
799 0 : const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
800 0 : if ( pData )
801 0 : return (char const *) pData->mpBestUnixCharset;
802 0 : else if( eTextEncoding == RTL_TEXTENCODING_UNICODE )
803 0 : return (char const *) "iso10646-1";
804 : else
805 0 : return 0;
806 : }
807 :
808 : /* ----------------------------------------------------------------------- */
809 :
810 0 : char const * SAL_CALL rtl_getMimeCharsetFromTextEncoding(rtl_TextEncoding
811 : nEncoding)
812 : {
813 0 : ImplTextEncodingData const * p = Impl_getTextEncodingData(nEncoding);
814 0 : return p && (p->mnInfoFlags & RTL_TEXTENCODING_INFO_MIME) != 0 ?
815 0 : p->mpBestMimeCharset : NULL;
816 : }
817 :
818 0 : const char* SAL_CALL rtl_getBestMimeCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
819 : {
820 0 : const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
821 0 : if ( pData )
822 0 : return (char const *) pData->mpBestMimeCharset;
823 : else
824 0 : return 0;
825 : }
826 :
827 : /* The following two functions are based on <http://www.sharmahd.com/tm/
828 : codepages.html>, <http://msdn.microsoft.com/workshop/author/dhtml/reference/
829 : charsets/charset4.asp>, and <http://www.iana.org/assignments/character-sets>.
830 : */
831 :
832 : rtl_TextEncoding SAL_CALL
833 0 : rtl_getTextEncodingFromWindowsCodePage(sal_uInt32 nCodePage)
834 : {
835 0 : switch (nCodePage)
836 : {
837 0 : case 437: return RTL_TEXTENCODING_IBM_437;
838 0 : case 708: return RTL_TEXTENCODING_ISO_8859_6;
839 0 : case 737: return RTL_TEXTENCODING_IBM_737;
840 0 : case 775: return RTL_TEXTENCODING_IBM_775;
841 0 : case 850: return RTL_TEXTENCODING_IBM_850;
842 0 : case 852: return RTL_TEXTENCODING_IBM_852;
843 0 : case 855: return RTL_TEXTENCODING_IBM_855;
844 0 : case 857: return RTL_TEXTENCODING_IBM_857;
845 0 : case 860: return RTL_TEXTENCODING_IBM_860;
846 0 : case 861: return RTL_TEXTENCODING_IBM_861;
847 0 : case 862: return RTL_TEXTENCODING_IBM_862;
848 0 : case 863: return RTL_TEXTENCODING_IBM_863;
849 0 : case 864: return RTL_TEXTENCODING_IBM_864;
850 0 : case 865: return RTL_TEXTENCODING_IBM_865;
851 0 : case 866: return RTL_TEXTENCODING_IBM_866;
852 0 : case 869: return RTL_TEXTENCODING_IBM_869;
853 0 : case 874: return RTL_TEXTENCODING_MS_874;
854 0 : case 932: return RTL_TEXTENCODING_MS_932;
855 0 : case 936: return RTL_TEXTENCODING_MS_936;
856 0 : case 949: return RTL_TEXTENCODING_MS_949;
857 0 : case 950: return RTL_TEXTENCODING_MS_950;
858 0 : case 1250: return RTL_TEXTENCODING_MS_1250;
859 0 : case 1251: return RTL_TEXTENCODING_MS_1251;
860 0 : case 1252: return RTL_TEXTENCODING_MS_1252;
861 0 : case 1253: return RTL_TEXTENCODING_MS_1253;
862 0 : case 1254: return RTL_TEXTENCODING_MS_1254;
863 0 : case 1255: return RTL_TEXTENCODING_MS_1255;
864 0 : case 1256: return RTL_TEXTENCODING_MS_1256;
865 0 : case 1257: return RTL_TEXTENCODING_MS_1257;
866 0 : case 1258: return RTL_TEXTENCODING_MS_1258;
867 0 : case 1361: return RTL_TEXTENCODING_MS_1361;
868 0 : case 10000: return RTL_TEXTENCODING_APPLE_ROMAN;
869 0 : case 10001: return RTL_TEXTENCODING_APPLE_JAPANESE;
870 0 : case 10002: return RTL_TEXTENCODING_APPLE_CHINTRAD;
871 0 : case 10003: return RTL_TEXTENCODING_APPLE_KOREAN;
872 0 : case 10004: return RTL_TEXTENCODING_APPLE_ARABIC;
873 0 : case 10005: return RTL_TEXTENCODING_APPLE_HEBREW;
874 0 : case 10006: return RTL_TEXTENCODING_APPLE_GREEK;
875 0 : case 10007: return RTL_TEXTENCODING_APPLE_CYRILLIC;
876 0 : case 10008: return RTL_TEXTENCODING_APPLE_CHINSIMP;
877 0 : case 10010: return RTL_TEXTENCODING_APPLE_ROMANIAN;
878 0 : case 10017: return RTL_TEXTENCODING_APPLE_UKRAINIAN;
879 0 : case 10029: return RTL_TEXTENCODING_APPLE_CENTEURO;
880 0 : case 10079: return RTL_TEXTENCODING_APPLE_ICELAND;
881 0 : case 10081: return RTL_TEXTENCODING_APPLE_TURKISH;
882 0 : case 10082: return RTL_TEXTENCODING_APPLE_CROATIAN;
883 0 : case 20127: return RTL_TEXTENCODING_ASCII_US;
884 0 : case 20866: return RTL_TEXTENCODING_KOI8_R;
885 0 : case 21866: return RTL_TEXTENCODING_KOI8_U;
886 0 : case 28591: return RTL_TEXTENCODING_ISO_8859_1;
887 0 : case 28592: return RTL_TEXTENCODING_ISO_8859_2;
888 0 : case 28593: return RTL_TEXTENCODING_ISO_8859_3;
889 0 : case 28594: return RTL_TEXTENCODING_ISO_8859_4;
890 0 : case 28595: return RTL_TEXTENCODING_ISO_8859_5;
891 0 : case 28596: return RTL_TEXTENCODING_ISO_8859_6;
892 0 : case 28597: return RTL_TEXTENCODING_ISO_8859_7;
893 0 : case 28598: return RTL_TEXTENCODING_ISO_8859_8;
894 0 : case 28599: return RTL_TEXTENCODING_ISO_8859_9;
895 0 : case 28605: return RTL_TEXTENCODING_ISO_8859_15;
896 0 : case 50220: return RTL_TEXTENCODING_ISO_2022_JP;
897 0 : case 50225: return RTL_TEXTENCODING_ISO_2022_KR;
898 0 : case 51932: return RTL_TEXTENCODING_EUC_JP;
899 0 : case 51936: return RTL_TEXTENCODING_EUC_CN;
900 0 : case 51949: return RTL_TEXTENCODING_EUC_KR;
901 0 : case 57002: return RTL_TEXTENCODING_ISCII_DEVANAGARI;
902 0 : case 65000: return RTL_TEXTENCODING_UTF7;
903 0 : case 65001: return RTL_TEXTENCODING_UTF8;
904 0 : default: return RTL_TEXTENCODING_DONTKNOW;
905 : }
906 : }
907 :
908 : sal_uInt32 SAL_CALL
909 0 : rtl_getWindowsCodePageFromTextEncoding(rtl_TextEncoding nEncoding)
910 : {
911 0 : switch (nEncoding)
912 : {
913 0 : case RTL_TEXTENCODING_IBM_437: return 437;
914 : /* case RTL_TEXTENCODING_ISO_8859_6: return 708; */
915 0 : case RTL_TEXTENCODING_IBM_737: return 737;
916 0 : case RTL_TEXTENCODING_IBM_775: return 775;
917 0 : case RTL_TEXTENCODING_IBM_850: return 850;
918 0 : case RTL_TEXTENCODING_IBM_852: return 852;
919 0 : case RTL_TEXTENCODING_IBM_855: return 855;
920 0 : case RTL_TEXTENCODING_IBM_857: return 857;
921 0 : case RTL_TEXTENCODING_IBM_860: return 860;
922 0 : case RTL_TEXTENCODING_IBM_861: return 861;
923 0 : case RTL_TEXTENCODING_IBM_862: return 862;
924 0 : case RTL_TEXTENCODING_IBM_863: return 863;
925 0 : case RTL_TEXTENCODING_IBM_864: return 864;
926 0 : case RTL_TEXTENCODING_IBM_865: return 865;
927 0 : case RTL_TEXTENCODING_IBM_866: return 866;
928 0 : case RTL_TEXTENCODING_IBM_869: return 869;
929 0 : case RTL_TEXTENCODING_MS_874: return 874;
930 0 : case RTL_TEXTENCODING_MS_932: return 932;
931 0 : case RTL_TEXTENCODING_MS_936: return 936;
932 0 : case RTL_TEXTENCODING_MS_949: return 949;
933 0 : case RTL_TEXTENCODING_MS_950: return 950;
934 0 : case RTL_TEXTENCODING_MS_1250: return 1250;
935 0 : case RTL_TEXTENCODING_MS_1251: return 1251;
936 0 : case RTL_TEXTENCODING_MS_1252: return 1252;
937 0 : case RTL_TEXTENCODING_MS_1253: return 1253;
938 0 : case RTL_TEXTENCODING_MS_1254: return 1254;
939 0 : case RTL_TEXTENCODING_MS_1255: return 1255;
940 0 : case RTL_TEXTENCODING_MS_1256: return 1256;
941 0 : case RTL_TEXTENCODING_MS_1257: return 1257;
942 0 : case RTL_TEXTENCODING_MS_1258: return 1258;
943 0 : case RTL_TEXTENCODING_MS_1361: return 1361;
944 0 : case RTL_TEXTENCODING_APPLE_ROMAN: return 10000;
945 0 : case RTL_TEXTENCODING_APPLE_JAPANESE: return 10001;
946 0 : case RTL_TEXTENCODING_APPLE_CHINTRAD: return 10002;
947 0 : case RTL_TEXTENCODING_APPLE_KOREAN: return 10003;
948 0 : case RTL_TEXTENCODING_APPLE_ARABIC: return 10004;
949 0 : case RTL_TEXTENCODING_APPLE_HEBREW: return 10005;
950 0 : case RTL_TEXTENCODING_APPLE_GREEK: return 10006;
951 0 : case RTL_TEXTENCODING_APPLE_CYRILLIC: return 10007;
952 0 : case RTL_TEXTENCODING_APPLE_CHINSIMP: return 10008;
953 0 : case RTL_TEXTENCODING_APPLE_ROMANIAN: return 10010;
954 0 : case RTL_TEXTENCODING_APPLE_UKRAINIAN: return 10017;
955 0 : case RTL_TEXTENCODING_APPLE_CENTEURO: return 10029;
956 0 : case RTL_TEXTENCODING_APPLE_ICELAND: return 10079;
957 0 : case RTL_TEXTENCODING_APPLE_TURKISH: return 10081;
958 0 : case RTL_TEXTENCODING_APPLE_CROATIAN: return 10082;
959 0 : case RTL_TEXTENCODING_ASCII_US: return 20127;
960 0 : case RTL_TEXTENCODING_KOI8_R: return 20866;
961 0 : case RTL_TEXTENCODING_KOI8_U: return 21866;
962 0 : case RTL_TEXTENCODING_ISO_8859_1: return 28591;
963 0 : case RTL_TEXTENCODING_ISO_8859_2: return 28592;
964 0 : case RTL_TEXTENCODING_ISO_8859_3: return 28593;
965 0 : case RTL_TEXTENCODING_ISO_8859_4: return 28594;
966 0 : case RTL_TEXTENCODING_ISO_8859_5: return 28595;
967 0 : case RTL_TEXTENCODING_ISO_8859_6: return 28596;
968 0 : case RTL_TEXTENCODING_ISO_8859_7: return 28597;
969 0 : case RTL_TEXTENCODING_ISO_8859_8: return 28598;
970 0 : case RTL_TEXTENCODING_ISO_8859_9: return 28599;
971 0 : case RTL_TEXTENCODING_ISO_8859_15: return 28605;
972 0 : case RTL_TEXTENCODING_ISO_2022_JP: return 50220;
973 0 : case RTL_TEXTENCODING_ISO_2022_KR: return 50225;
974 0 : case RTL_TEXTENCODING_EUC_JP: return 51932;
975 0 : case RTL_TEXTENCODING_EUC_CN: return 51936;
976 0 : case RTL_TEXTENCODING_EUC_KR: return 51949;
977 0 : case RTL_TEXTENCODING_ISCII_DEVANAGARI: return 57002;
978 0 : case RTL_TEXTENCODING_UTF7: return 65000;
979 0 : case RTL_TEXTENCODING_UTF8: return 65001;
980 0 : default: return 0;
981 : }
982 : }
983 :
984 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|