Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "sal/config.h"
21 :
22 : #include "rtl/textcvt.h"
23 :
24 : #include "handleundefinedunicodetotextchar.hxx"
25 : #include "tenchelp.hxx"
26 : #include "unichars.hxx"
27 :
28 : /* ======================================================================= */
29 :
30 : /* DBCS to Unicode conversion routine use a lead table for the first byte, */
31 : /* where we determine the trail table or for single byte chars the unicode */
32 : /* value. We have for all lead byte a separate table, because we can */
33 : /* then share many tables for different charset encodings. */
34 :
35 : /* ======================================================================= */
36 :
37 3418 : sal_Size ImplDBCSToUnicode( const void* pData, SAL_UNUSED_PARAMETER void*,
38 : const char* pSrcBuf, sal_Size nSrcBytes,
39 : sal_Unicode* pDestBuf, sal_Size nDestChars,
40 : sal_uInt32 nFlags, sal_uInt32* pInfo,
41 : sal_Size* pSrcCvtBytes )
42 : {
43 : unsigned char cLead;
44 : unsigned char cTrail;
45 : sal_Unicode cConv;
46 : const ImplDBCSToUniLeadTab* pLeadEntry;
47 3418 : const ImplDBCSConvertData* pConvertData = static_cast<const ImplDBCSConvertData*>(pData);
48 3418 : const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab;
49 : sal_Unicode* pEndDestBuf;
50 : const char* pEndSrcBuf;
51 :
52 3418 : *pInfo = 0;
53 3418 : pEndDestBuf = pDestBuf+nDestChars;
54 3418 : pEndSrcBuf = pSrcBuf+nSrcBytes;
55 21859 : while ( pSrcBuf < pEndSrcBuf )
56 : {
57 15371 : cLead = (unsigned char)*pSrcBuf;
58 :
59 : /* get entry for the lead byte */
60 15371 : pLeadEntry = pLeadTab+cLead;
61 :
62 : /* SingleByte char? */
63 15371 : if (pLeadEntry->mpToUniTrailTab == NULL
64 2203 : || cLead < pConvertData->mnLeadStart
65 2203 : || cLead > pConvertData->mnLeadEnd)
66 : {
67 13168 : cConv = pLeadEntry->mnUniChar;
68 26336 : if ( !cConv && (cLead != 0) )
69 : {
70 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED;
71 0 : if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR )
72 : {
73 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
74 0 : break;
75 : }
76 0 : else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE )
77 : {
78 0 : pSrcBuf++;
79 0 : continue;
80 : }
81 : else
82 0 : cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags);
83 : }
84 : }
85 : else
86 : {
87 : /* Source buffer to small */
88 2203 : if ( pSrcBuf +1 == pEndSrcBuf )
89 : {
90 351 : if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_FLUSH) == 0 )
91 : {
92 348 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
93 348 : break;
94 : }
95 3 : cConv = 0;
96 : }
97 : else
98 : {
99 1852 : pSrcBuf++;
100 1852 : cTrail = (unsigned char)*pSrcBuf;
101 1852 : if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
102 1744 : cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
103 : else
104 108 : cConv = 0;
105 :
106 1852 : if ( !cConv )
107 : {
108 : /* EUDC Ranges */
109 : sal_uInt16 i;
110 108 : const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab;
111 272 : for ( i = 0; i < pConvertData->mnEUDCCount; i++ )
112 : {
113 496 : if ( (cLead >= pEUDCTab->mnLeadStart) &&
114 224 : (cLead <= pEUDCTab->mnLeadEnd) )
115 : {
116 216 : if ( (cTrail >= pEUDCTab->mnTrail1Start) &&
117 108 : (cTrail <= pEUDCTab->mnTrail1End) )
118 : {
119 : cConv = pEUDCTab->mnUniStart+
120 78 : ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
121 156 : (cTrail-pEUDCTab->mnTrail1Start);
122 78 : break;
123 : }
124 : else
125 : {
126 30 : sal_uInt16 nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1;
127 60 : if ( (pEUDCTab->mnTrailCount >= 2) &&
128 60 : (cTrail >= pEUDCTab->mnTrail2Start) &&
129 30 : (cTrail <= pEUDCTab->mnTrail2End) )
130 : {
131 : cConv = pEUDCTab->mnUniStart+
132 30 : ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
133 : nTrailCount+
134 60 : (cTrail-pEUDCTab->mnTrail2Start);
135 30 : break;
136 : }
137 : else
138 : {
139 0 : nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1;
140 0 : if ( (pEUDCTab->mnTrailCount >= 3) &&
141 0 : (cTrail >= pEUDCTab->mnTrail3Start) &&
142 0 : (cTrail <= pEUDCTab->mnTrail3End) )
143 : {
144 : cConv = pEUDCTab->mnUniStart+
145 0 : ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
146 : nTrailCount+
147 0 : (cTrail-pEUDCTab->mnTrail3Start);
148 0 : break;
149 : }
150 : }
151 : }
152 : }
153 :
154 164 : pEUDCTab++;
155 : }
156 :
157 108 : if ( !cConv )
158 : {
159 : /* We compare the full range of the trail we defined, */
160 : /* which can often be greater than the limit. We do this */
161 : /* so that extensions that don't consider encodings */
162 : /* correctly treat double-byte characters as a single */
163 : /* character as much as possible. */
164 :
165 0 : if (cLead < pConvertData->mnLeadStart
166 0 : || cLead > pConvertData->mnLeadEnd
167 0 : || cTrail < pConvertData->mnTrailStart
168 0 : || cTrail > pConvertData->mnTrailEnd)
169 : {
170 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
171 0 : if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
172 : {
173 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
174 0 : break;
175 : }
176 0 : else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
177 : {
178 0 : pSrcBuf++;
179 0 : continue;
180 : }
181 : else
182 0 : cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
183 : }
184 : }
185 : }
186 : }
187 1855 : if ( !cConv )
188 : {
189 3 : *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
190 3 : if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
191 : {
192 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
193 0 : break;
194 : }
195 3 : else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
196 : {
197 0 : pSrcBuf++;
198 0 : continue;
199 : }
200 : else
201 3 : cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
202 : }
203 : }
204 :
205 15023 : if ( pDestBuf == pEndDestBuf )
206 : {
207 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
208 0 : break;
209 : }
210 :
211 15023 : *pDestBuf = cConv;
212 15023 : pDestBuf++;
213 15023 : pSrcBuf++;
214 : }
215 :
216 3418 : *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
217 3418 : return (nDestChars - (pEndDestBuf-pDestBuf));
218 : }
219 :
220 : /* ----------------------------------------------------------------------- */
221 :
222 183770 : sal_Size ImplUnicodeToDBCS( const void* pData, SAL_UNUSED_PARAMETER void*,
223 : const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
224 : char* pDestBuf, sal_Size nDestBytes,
225 : sal_uInt32 nFlags, sal_uInt32* pInfo,
226 : sal_Size* pSrcCvtChars )
227 : {
228 : sal_uInt16 cConv;
229 : sal_Unicode c;
230 : const ImplUniToDBCSHighTab* pHighEntry;
231 183770 : const ImplDBCSConvertData* pConvertData = static_cast<const ImplDBCSConvertData*>(pData);
232 183770 : const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab;
233 : char* pEndDestBuf;
234 : const sal_Unicode* pEndSrcBuf;
235 :
236 : bool bCheckRange =
237 183770 : pConvertData->mnLeadStart != 0 || pConvertData->mnLeadEnd != 0xFF;
238 : /* this statement has the effect that this extra check is only done for
239 : EUC-KR, which uses the MS-949 tables, but does not support the full
240 : range of MS-949 */
241 :
242 183770 : *pInfo = 0;
243 183770 : pEndDestBuf = pDestBuf+nDestBytes;
244 183770 : pEndSrcBuf = pSrcBuf+nSrcChars;
245 425890 : while ( pSrcBuf < pEndSrcBuf )
246 : {
247 184932 : c = *pSrcBuf;
248 184932 : unsigned char nHighChar = (unsigned char)((c >> 8) & 0xFF);
249 184932 : unsigned char nLowChar = (unsigned char)(c & 0xFF);
250 :
251 : /* get entry for the high byte */
252 184932 : pHighEntry = pHighTab+nHighChar;
253 :
254 : /* is low byte in the table range */
255 184932 : if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
256 : {
257 53519 : cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
258 107038 : if (bCheckRange && cConv > 0x7F
259 46 : && ((cConv >> 8) < pConvertData->mnLeadStart
260 36 : || (cConv >> 8) > pConvertData->mnLeadEnd
261 36 : || (cConv & 0xFF) < pConvertData->mnTrailStart
262 36 : || (cConv & 0xFF) > pConvertData->mnTrailEnd))
263 10 : cConv = 0;
264 : }
265 : else
266 131413 : cConv = 0;
267 :
268 184932 : if (cConv == 0 && c != 0)
269 : {
270 : /* Map to EUDC ranges: */
271 136369 : ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab;
272 : sal_uInt32 i;
273 449679 : for (i = 0; i < pConvertData->mnEUDCCount; ++i)
274 : {
275 323086 : if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd)
276 : {
277 9776 : sal_uInt32 nIndex = c - pEUDCTab->mnUniStart;
278 : sal_uInt32 nLeadOff
279 9776 : = nIndex / pEUDCTab->mnTrailRangeCount;
280 : sal_uInt32 nTrailOff
281 9776 : = nIndex % pEUDCTab->mnTrailRangeCount;
282 : sal_uInt32 nSize;
283 : cConv = (sal_uInt16)
284 9776 : ((pEUDCTab->mnLeadStart + nLeadOff) << 8);
285 : nSize
286 9776 : = pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1;
287 9776 : if (nTrailOff < nSize)
288 : {
289 4830 : cConv |= pEUDCTab->mnTrail1Start + nTrailOff;
290 4830 : break;
291 : }
292 4946 : nTrailOff -= nSize;
293 : nSize
294 4946 : = pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1;
295 4946 : if (nTrailOff < nSize)
296 : {
297 4932 : cConv |= pEUDCTab->mnTrail2Start + nTrailOff;
298 4932 : break;
299 : }
300 14 : nTrailOff -= nSize;
301 14 : cConv |= pEUDCTab->mnTrail3Start + nTrailOff;
302 14 : break;
303 : }
304 313310 : pEUDCTab++;
305 : }
306 :
307 : /* FIXME
308 : * SB: Not sure why this is in here. Plus, it does not work as
309 : * intended when (c & 0xFF) == 0, because the next !cConv check
310 : * will then think c has not yet been converted...
311 : */
312 136369 : if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START
313 9243 : && c <= RTL_TEXTCVT_BYTE_PRIVATE_END)
314 : {
315 256 : if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
316 0 : cConv = static_cast< char >(static_cast< unsigned char >(c & 0xFF));
317 : }
318 : }
319 :
320 184932 : if ( !cConv )
321 : {
322 126598 : if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
323 : {
324 : /* !!! */
325 : }
326 :
327 126598 : if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
328 : {
329 : /* !!! */
330 : }
331 :
332 : /* Handle undefined and surrogates characters */
333 : /* (all surrogates characters are undefined) */
334 126598 : if (sal::detail::textenc::handleUndefinedUnicodeToTextChar(
335 : &pSrcBuf, pEndSrcBuf, &pDestBuf, pEndDestBuf, nFlags,
336 : pInfo))
337 16 : continue;
338 : else
339 126582 : break;
340 : }
341 :
342 : /* SingleByte */
343 58334 : if ( !(cConv & 0xFF00) )
344 : {
345 1062 : if ( pDestBuf == pEndDestBuf )
346 : {
347 0 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
348 0 : break;
349 : }
350 :
351 1062 : *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
352 1062 : pDestBuf++;
353 : }
354 : else
355 : {
356 57272 : if ( pDestBuf+1 >= pEndDestBuf )
357 : {
358 0 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
359 0 : break;
360 : }
361 :
362 57272 : *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
363 57272 : pDestBuf++;
364 57272 : *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
365 57272 : pDestBuf++;
366 : }
367 :
368 58334 : pSrcBuf++;
369 : }
370 :
371 183770 : *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
372 183770 : return (nDestBytes - (pEndDestBuf-pDestBuf));
373 : }
374 :
375 : /* ======================================================================= */
376 :
377 : #define JIS_EUC_LEAD_OFF 0x80
378 : #define JIS_EUC_TRAIL_OFF 0x80
379 :
380 : /* ----------------------------------------------------------------------- */
381 :
382 324 : sal_Size ImplEUCJPToUnicode( const void* pData,
383 : SAL_UNUSED_PARAMETER void*,
384 : const char* pSrcBuf, sal_Size nSrcBytes,
385 : sal_Unicode* pDestBuf, sal_Size nDestChars,
386 : sal_uInt32 nFlags, sal_uInt32* pInfo,
387 : sal_Size* pSrcCvtBytes )
388 : {
389 324 : unsigned char cLead = '\0';
390 324 : unsigned char cTrail = '\0';
391 : sal_Unicode cConv;
392 : const ImplDBCSToUniLeadTab* pLeadEntry;
393 : const ImplDBCSToUniLeadTab* pLeadTab;
394 324 : const ImplEUCJPConvertData* pConvertData = static_cast<const ImplEUCJPConvertData*>(pData);
395 : sal_Unicode* pEndDestBuf;
396 : const char* pEndSrcBuf;
397 :
398 324 : *pInfo = 0;
399 324 : pEndDestBuf = pDestBuf+nDestChars;
400 324 : pEndSrcBuf = pSrcBuf+nSrcBytes;
401 964 : while ( pSrcBuf < pEndSrcBuf )
402 : {
403 479 : unsigned char c = (unsigned char)*pSrcBuf;
404 :
405 : /* ASCII */
406 479 : if ( c <= 0x7F )
407 0 : cConv = c;
408 : else
409 : {
410 : /* SS2 - Half-width katakana */
411 : /* 8E + A1-DF */
412 479 : if ( c == 0x8E )
413 : {
414 : /* Source buffer to small */
415 1 : if ( pSrcBuf + 1 == pEndSrcBuf )
416 : {
417 1 : *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
418 1 : break;
419 : }
420 :
421 0 : pSrcBuf++;
422 0 : c = (unsigned char)*pSrcBuf;
423 0 : if ( (c >= 0xA1) && (c <= 0xDF) )
424 0 : cConv = 0xFF61+(c-0xA1);
425 : else
426 : {
427 0 : cConv = 0;
428 0 : cLead = 0x8E;
429 0 : cTrail = c;
430 : }
431 : }
432 : else
433 : {
434 : /* SS3 - JIS 0212-1990 */
435 : /* 8F + A1-FE + A1-FE */
436 478 : if ( c == 0x8F )
437 : {
438 : /* Source buffer to small */
439 2 : if (pEndSrcBuf - pSrcBuf < 3)
440 : {
441 2 : *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
442 2 : break;
443 : }
444 :
445 0 : pSrcBuf++;
446 0 : cLead = (unsigned char)*pSrcBuf;
447 0 : pSrcBuf++;
448 0 : cTrail = (unsigned char)*pSrcBuf;
449 0 : pLeadTab = pConvertData->mpJIS0212ToUniLeadTab;
450 : }
451 : /* CodeSet 2 JIS 0208-1997 */
452 : /* A1-FE + A1-FE */
453 : else
454 : {
455 : /* Source buffer to small */
456 476 : if ( pSrcBuf + 1 == pEndSrcBuf )
457 : {
458 160 : *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
459 160 : break;
460 : }
461 :
462 316 : cLead = c;
463 316 : pSrcBuf++;
464 316 : cTrail = (unsigned char)*pSrcBuf;
465 316 : pLeadTab = pConvertData->mpJIS0208ToUniLeadTab;
466 : }
467 :
468 : /* Undefined Range */
469 316 : if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
470 0 : cConv = 0;
471 : else
472 : {
473 316 : cLead -= JIS_EUC_LEAD_OFF;
474 316 : cTrail -= JIS_EUC_TRAIL_OFF;
475 316 : pLeadEntry = pLeadTab+cLead;
476 316 : if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
477 316 : cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
478 : else
479 0 : cConv = 0;
480 : }
481 : }
482 :
483 316 : if ( !cConv )
484 : {
485 : /* We compare the full range of the trail we defined, */
486 : /* which can often be greater than the limit. We do this */
487 : /* so that extensions that don't consider encodings */
488 : /* correctly treat double-byte characters as a single */
489 : /* character as much as possible. */
490 :
491 0 : if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
492 : {
493 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
494 0 : if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
495 : {
496 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
497 0 : break;
498 : }
499 0 : else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
500 : {
501 0 : pSrcBuf++;
502 0 : continue;
503 : }
504 : else
505 0 : cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
506 : }
507 : else
508 : {
509 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
510 0 : if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
511 : {
512 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
513 0 : break;
514 : }
515 0 : else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
516 : {
517 0 : pSrcBuf++;
518 0 : continue;
519 : }
520 : else
521 0 : cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
522 : }
523 : }
524 : }
525 :
526 316 : if ( pDestBuf == pEndDestBuf )
527 : {
528 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
529 0 : break;
530 : }
531 :
532 316 : *pDestBuf = cConv;
533 316 : pDestBuf++;
534 316 : pSrcBuf++;
535 : }
536 :
537 324 : *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
538 324 : return (nDestChars - (pEndDestBuf-pDestBuf));
539 : }
540 :
541 : /* ----------------------------------------------------------------------- */
542 :
543 3 : sal_Size ImplUnicodeToEUCJP( const void* pData,
544 : SAL_UNUSED_PARAMETER void*,
545 : const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
546 : char* pDestBuf, sal_Size nDestBytes,
547 : sal_uInt32 nFlags, sal_uInt32* pInfo,
548 : sal_Size* pSrcCvtChars )
549 : {
550 : sal_uInt32 cConv;
551 : sal_Unicode c;
552 : unsigned char nHighChar;
553 : unsigned char nLowChar;
554 : const ImplUniToDBCSHighTab* pHighEntry;
555 : const ImplUniToDBCSHighTab* pHighTab;
556 3 : const ImplEUCJPConvertData* pConvertData = static_cast<const ImplEUCJPConvertData*>(pData);
557 : char* pEndDestBuf;
558 : const sal_Unicode* pEndSrcBuf;
559 :
560 3 : *pInfo = 0;
561 3 : pEndDestBuf = pDestBuf+nDestBytes;
562 3 : pEndSrcBuf = pSrcBuf+nSrcChars;
563 82 : while ( pSrcBuf < pEndSrcBuf )
564 : {
565 76 : c = *pSrcBuf;
566 :
567 : /* ASCII */
568 76 : if ( c <= 0x7F )
569 0 : cConv = c;
570 : /* Half-width katakana */
571 76 : else if ( (c >= 0xFF61) && (c <= 0xFF9F) )
572 0 : cConv = 0x8E00+0xA1+(c-0xFF61);
573 : else
574 : {
575 76 : nHighChar = (unsigned char)((c >> 8) & 0xFF);
576 76 : nLowChar = (unsigned char)(c & 0xFF);
577 :
578 : /* JIS 0208 */
579 76 : pHighTab = pConvertData->mpUniToJIS0208HighTab;
580 76 : pHighEntry = pHighTab+nHighChar;
581 76 : if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
582 : {
583 76 : cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
584 152 : if (cConv != 0)
585 75 : cConv |= 0x8080;
586 : }
587 : else
588 0 : cConv = 0;
589 :
590 : /* JIS 0212 */
591 76 : if ( !cConv )
592 : {
593 1 : pHighTab = pConvertData->mpUniToJIS0212HighTab;
594 1 : pHighEntry = pHighTab+nHighChar;
595 1 : if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
596 : {
597 0 : cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
598 0 : if (cConv != 0)
599 0 : cConv |= 0x8F8080;
600 : }
601 :
602 1 : if ( !cConv )
603 : {
604 1 : if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
605 : {
606 : /* !!! */
607 : }
608 :
609 1 : if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
610 : {
611 : /* !!! */
612 : }
613 :
614 : /* Handle undefined and surrogates characters */
615 : /* (all surrogates characters are undefined) */
616 1 : if (sal::detail::textenc::handleUndefinedUnicodeToTextChar(
617 : &pSrcBuf, pEndSrcBuf, &pDestBuf, pEndDestBuf,
618 : nFlags, pInfo))
619 1 : continue;
620 : else
621 0 : break;
622 : }
623 : }
624 : }
625 :
626 : /* SingleByte */
627 75 : if ( !(cConv & 0xFFFF00) )
628 : {
629 0 : if ( pDestBuf == pEndDestBuf )
630 : {
631 0 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
632 0 : break;
633 : }
634 :
635 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
636 0 : pDestBuf++;
637 : }
638 : /* DoubleByte */
639 75 : else if ( !(cConv & 0xFF0000) )
640 : {
641 75 : if ( pDestBuf+1 >= pEndDestBuf )
642 : {
643 0 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
644 0 : break;
645 : }
646 :
647 75 : *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
648 75 : pDestBuf++;
649 75 : *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
650 75 : pDestBuf++;
651 : }
652 : else
653 : {
654 0 : if ( pDestBuf+2 >= pEndDestBuf )
655 : {
656 0 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
657 0 : break;
658 : }
659 :
660 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 16) & 0xFF));
661 0 : pDestBuf++;
662 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
663 0 : pDestBuf++;
664 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
665 0 : pDestBuf++;
666 : }
667 :
668 75 : pSrcBuf++;
669 : }
670 :
671 3 : *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
672 3 : return (nDestBytes - (pEndDestBuf-pDestBuf));
673 : }
674 :
675 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|