Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "sal/config.h"
21 :
22 : #include "rtl/textcvt.h"
23 :
24 : #include "handleundefinedunicodetotextchar.hxx"
25 : #include "tenchelp.hxx"
26 : #include "unichars.hxx"
27 :
28 : /* ======================================================================= */
29 :
30 : /* DBCS to Unicode conversion routine use a lead table for the first byte, */
31 : /* where we determine the trail table or for single byte chars the unicode */
32 : /* value. We have for all lead byte a separate table, because we can */
33 : /* then share many tables for different charset encodings. */
34 :
35 : /* ======================================================================= */
36 :
37 0 : sal_Size ImplDBCSToUnicode( const void* pData, SAL_UNUSED_PARAMETER void*,
38 : const char* pSrcBuf, sal_Size nSrcBytes,
39 : sal_Unicode* pDestBuf, sal_Size nDestChars,
40 : sal_uInt32 nFlags, sal_uInt32* pInfo,
41 : sal_Size* pSrcCvtBytes )
42 : {
43 : unsigned char cLead;
44 : unsigned char cTrail;
45 : sal_Unicode cConv;
46 : const ImplDBCSToUniLeadTab* pLeadEntry;
47 0 : const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData;
48 0 : const ImplDBCSToUniLeadTab* pLeadTab = pConvertData->mpToUniLeadTab;
49 : sal_Unicode* pEndDestBuf;
50 : const char* pEndSrcBuf;
51 :
52 0 : *pInfo = 0;
53 0 : pEndDestBuf = pDestBuf+nDestChars;
54 0 : pEndSrcBuf = pSrcBuf+nSrcBytes;
55 0 : while ( pSrcBuf < pEndSrcBuf )
56 : {
57 0 : cLead = (unsigned char)*pSrcBuf;
58 :
59 : /* get entry for the lead byte */
60 0 : pLeadEntry = pLeadTab+cLead;
61 :
62 : /* SingleByte char? */
63 0 : if (pLeadEntry->mpToUniTrailTab == NULL
64 0 : || cLead < pConvertData->mnLeadStart
65 0 : || cLead > pConvertData->mnLeadEnd)
66 : {
67 0 : cConv = pLeadEntry->mnUniChar;
68 0 : if ( !cConv && (cLead != 0) )
69 : {
70 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_UNDEFINED;
71 0 : if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR )
72 : {
73 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
74 0 : break;
75 : }
76 0 : else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_IGNORE )
77 : {
78 0 : pSrcBuf++;
79 0 : continue;
80 : }
81 : else
82 0 : cConv = ImplGetUndefinedUnicodeChar(cLead, nFlags);
83 : }
84 : }
85 : else
86 : {
87 : /* Source buffer to small */
88 0 : if ( pSrcBuf +1 == pEndSrcBuf )
89 : {
90 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
91 0 : break;
92 : }
93 :
94 0 : pSrcBuf++;
95 0 : cTrail = (unsigned char)*pSrcBuf;
96 0 : if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
97 0 : cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
98 : else
99 0 : cConv = 0;
100 :
101 0 : if ( !cConv )
102 : {
103 : /* EUDC Ranges */
104 : sal_uInt16 i;
105 0 : const ImplDBCSEUDCData* pEUDCTab = pConvertData->mpEUDCTab;
106 0 : for ( i = 0; i < pConvertData->mnEUDCCount; i++ )
107 : {
108 0 : if ( (cLead >= pEUDCTab->mnLeadStart) &&
109 0 : (cLead <= pEUDCTab->mnLeadEnd) )
110 : {
111 0 : if ( (cTrail >= pEUDCTab->mnTrail1Start) &&
112 0 : (cTrail <= pEUDCTab->mnTrail1End) )
113 : {
114 : cConv = pEUDCTab->mnUniStart+
115 0 : ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
116 0 : (cTrail-pEUDCTab->mnTrail1Start);
117 0 : break;
118 : }
119 : else
120 : {
121 0 : sal_uInt16 nTrailCount = pEUDCTab->mnTrail1End-pEUDCTab->mnTrail1Start+1;
122 0 : if ( (pEUDCTab->mnTrailCount >= 2) &&
123 0 : (cTrail >= pEUDCTab->mnTrail2Start) &&
124 0 : (cTrail <= pEUDCTab->mnTrail2End) )
125 : {
126 : cConv = pEUDCTab->mnUniStart+
127 0 : ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
128 : nTrailCount+
129 0 : (cTrail-pEUDCTab->mnTrail2Start);
130 0 : break;
131 : }
132 : else
133 : {
134 0 : nTrailCount = pEUDCTab->mnTrail2End-pEUDCTab->mnTrail2Start+1;
135 0 : if ( (pEUDCTab->mnTrailCount >= 3) &&
136 0 : (cTrail >= pEUDCTab->mnTrail3Start) &&
137 0 : (cTrail <= pEUDCTab->mnTrail3End) )
138 : {
139 : cConv = pEUDCTab->mnUniStart+
140 0 : ((cLead-pEUDCTab->mnLeadStart)*pEUDCTab->mnTrailRangeCount)+
141 : nTrailCount+
142 0 : (cTrail-pEUDCTab->mnTrail3Start);
143 0 : break;
144 : }
145 : }
146 : }
147 : }
148 :
149 0 : pEUDCTab++;
150 : }
151 :
152 0 : if ( !cConv )
153 : {
154 : /* We compare the full range of the trail we defined, */
155 : /* which can often be greater than the limit. We do this */
156 : /* so that extensions that don't consider encodings */
157 : /* correctly treat double-byte characters as a single */
158 : /* character as much as possible. */
159 :
160 0 : if (cLead < pConvertData->mnLeadStart
161 0 : || cLead > pConvertData->mnLeadEnd
162 0 : || cTrail < pConvertData->mnTrailStart
163 0 : || cTrail > pConvertData->mnTrailEnd)
164 : {
165 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
166 0 : if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
167 : {
168 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
169 0 : break;
170 : }
171 0 : else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
172 : {
173 0 : pSrcBuf++;
174 0 : continue;
175 : }
176 : else
177 0 : cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
178 : }
179 : else
180 : {
181 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
182 0 : if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
183 : {
184 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
185 0 : break;
186 : }
187 0 : else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
188 : {
189 0 : pSrcBuf++;
190 0 : continue;
191 : }
192 : else
193 0 : cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
194 : }
195 : }
196 : }
197 : }
198 :
199 0 : if ( pDestBuf == pEndDestBuf )
200 : {
201 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
202 0 : break;
203 : }
204 :
205 0 : *pDestBuf = cConv;
206 0 : pDestBuf++;
207 0 : pSrcBuf++;
208 : }
209 :
210 0 : *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
211 0 : return (nDestChars - (pEndDestBuf-pDestBuf));
212 : }
213 :
214 : /* ----------------------------------------------------------------------- */
215 :
216 0 : sal_Size ImplUnicodeToDBCS( const void* pData, SAL_UNUSED_PARAMETER void*,
217 : const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
218 : char* pDestBuf, sal_Size nDestBytes,
219 : sal_uInt32 nFlags, sal_uInt32* pInfo,
220 : sal_Size* pSrcCvtChars )
221 : {
222 : sal_uInt16 cConv;
223 : sal_Unicode c;
224 : unsigned char nHighChar;
225 : unsigned char nLowChar;
226 : const ImplUniToDBCSHighTab* pHighEntry;
227 0 : const ImplDBCSConvertData* pConvertData = (const ImplDBCSConvertData*)pData;
228 0 : const ImplUniToDBCSHighTab* pHighTab = pConvertData->mpToDBCSHighTab;
229 : char* pEndDestBuf;
230 : const sal_Unicode* pEndSrcBuf;
231 :
232 : bool bCheckRange =
233 0 : pConvertData->mnLeadStart != 0 || pConvertData->mnLeadEnd != 0xFF;
234 : /* this statement has the effect that this extra check is only done for
235 : EUC-KR, which uses the MS-949 tables, but does not support the full
236 : range of MS-949 */
237 :
238 0 : *pInfo = 0;
239 0 : pEndDestBuf = pDestBuf+nDestBytes;
240 0 : pEndSrcBuf = pSrcBuf+nSrcChars;
241 0 : while ( pSrcBuf < pEndSrcBuf )
242 : {
243 0 : c = *pSrcBuf;
244 0 : nHighChar = (unsigned char)((c >> 8) & 0xFF);
245 0 : nLowChar = (unsigned char)(c & 0xFF);
246 :
247 : /* get entry for the high byte */
248 0 : pHighEntry = pHighTab+nHighChar;
249 :
250 : /* is low byte in the table range */
251 0 : if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
252 : {
253 0 : cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
254 0 : if (bCheckRange && cConv > 0x7F
255 0 : && ((cConv >> 8) < pConvertData->mnLeadStart
256 0 : || (cConv >> 8) > pConvertData->mnLeadEnd
257 0 : || (cConv & 0xFF) < pConvertData->mnTrailStart
258 0 : || (cConv & 0xFF) > pConvertData->mnTrailEnd))
259 0 : cConv = 0;
260 : }
261 : else
262 0 : cConv = 0;
263 :
264 0 : if (cConv == 0 && c != 0)
265 : {
266 : /* Map to EUDC ranges: */
267 0 : ImplDBCSEUDCData const * pEUDCTab = pConvertData->mpEUDCTab;
268 : sal_uInt32 i;
269 0 : for (i = 0; i < pConvertData->mnEUDCCount; ++i)
270 : {
271 0 : if (c >= pEUDCTab->mnUniStart && c <= pEUDCTab->mnUniEnd)
272 : {
273 0 : sal_uInt32 nIndex = c - pEUDCTab->mnUniStart;
274 : sal_uInt32 nLeadOff
275 0 : = nIndex / pEUDCTab->mnTrailRangeCount;
276 : sal_uInt32 nTrailOff
277 0 : = nIndex % pEUDCTab->mnTrailRangeCount;
278 : sal_uInt32 nSize;
279 : cConv = (sal_uInt16)
280 0 : ((pEUDCTab->mnLeadStart + nLeadOff) << 8);
281 : nSize
282 0 : = pEUDCTab->mnTrail1End - pEUDCTab->mnTrail1Start + 1;
283 0 : if (nTrailOff < nSize)
284 : {
285 0 : cConv |= pEUDCTab->mnTrail1Start + nTrailOff;
286 0 : break;
287 : }
288 0 : nTrailOff -= nSize;
289 : nSize
290 0 : = pEUDCTab->mnTrail2End - pEUDCTab->mnTrail2Start + 1;
291 0 : if (nTrailOff < nSize)
292 : {
293 0 : cConv |= pEUDCTab->mnTrail2Start + nTrailOff;
294 0 : break;
295 : }
296 0 : nTrailOff -= nSize;
297 0 : cConv |= pEUDCTab->mnTrail3Start + nTrailOff;
298 0 : break;
299 : }
300 0 : pEUDCTab++;
301 : }
302 :
303 : /* FIXME
304 : * SB: Not sure why this is in here. Plus, it does not work as
305 : * intended when (c & 0xFF) == 0, because the next !cConv check
306 : * will then think c has not yet been converted...
307 : */
308 0 : if (c >= RTL_TEXTCVT_BYTE_PRIVATE_START
309 0 : && c <= RTL_TEXTCVT_BYTE_PRIVATE_END)
310 : {
311 0 : if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
312 0 : cConv = static_cast< char >(static_cast< unsigned char >(c & 0xFF));
313 : }
314 : }
315 :
316 0 : if ( !cConv )
317 : {
318 0 : if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
319 : {
320 : /* !!! */
321 : }
322 :
323 0 : if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
324 : {
325 : /* !!! */
326 : }
327 :
328 : /* Handle undefined and surrogates characters */
329 : /* (all surrogates characters are undefined) */
330 0 : if (sal::detail::textenc::handleUndefinedUnicodeToTextChar(
331 : &pSrcBuf, pEndSrcBuf, &pDestBuf, pEndDestBuf, nFlags,
332 : pInfo))
333 0 : continue;
334 : else
335 0 : break;
336 : }
337 :
338 : /* SingleByte */
339 0 : if ( !(cConv & 0xFF00) )
340 : {
341 0 : if ( pDestBuf == pEndDestBuf )
342 : {
343 0 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
344 0 : break;
345 : }
346 :
347 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
348 0 : pDestBuf++;
349 : }
350 : else
351 : {
352 0 : if ( pDestBuf+1 >= pEndDestBuf )
353 : {
354 0 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
355 0 : break;
356 : }
357 :
358 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
359 0 : pDestBuf++;
360 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
361 0 : pDestBuf++;
362 : }
363 :
364 0 : pSrcBuf++;
365 : }
366 :
367 0 : *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
368 0 : return (nDestBytes - (pEndDestBuf-pDestBuf));
369 : }
370 :
371 : /* ======================================================================= */
372 :
373 : #define JIS_EUC_LEAD_OFF 0x80
374 : #define JIS_EUC_TRAIL_OFF 0x80
375 :
376 : /* ----------------------------------------------------------------------- */
377 :
378 0 : sal_Size ImplEUCJPToUnicode( const void* pData,
379 : SAL_UNUSED_PARAMETER void*,
380 : const char* pSrcBuf, sal_Size nSrcBytes,
381 : sal_Unicode* pDestBuf, sal_Size nDestChars,
382 : sal_uInt32 nFlags, sal_uInt32* pInfo,
383 : sal_Size* pSrcCvtBytes )
384 : {
385 : unsigned char c;
386 0 : unsigned char cLead = '\0';
387 0 : unsigned char cTrail = '\0';
388 : sal_Unicode cConv;
389 : const ImplDBCSToUniLeadTab* pLeadEntry;
390 : const ImplDBCSToUniLeadTab* pLeadTab;
391 0 : const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData;
392 : sal_Unicode* pEndDestBuf;
393 : const char* pEndSrcBuf;
394 :
395 0 : *pInfo = 0;
396 0 : pEndDestBuf = pDestBuf+nDestChars;
397 0 : pEndSrcBuf = pSrcBuf+nSrcBytes;
398 0 : while ( pSrcBuf < pEndSrcBuf )
399 : {
400 0 : c = (unsigned char)*pSrcBuf;
401 :
402 : /* ASCII */
403 0 : if ( c <= 0x7F )
404 0 : cConv = c;
405 : else
406 : {
407 : /* SS2 - Half-width katakana */
408 : /* 8E + A1-DF */
409 0 : if ( c == 0x8E )
410 : {
411 : /* Source buffer to small */
412 0 : if ( pSrcBuf + 1 == pEndSrcBuf )
413 : {
414 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
415 0 : break;
416 : }
417 :
418 0 : pSrcBuf++;
419 0 : c = (unsigned char)*pSrcBuf;
420 0 : if ( (c >= 0xA1) && (c <= 0xDF) )
421 0 : cConv = 0xFF61+(c-0xA1);
422 : else
423 : {
424 0 : cConv = 0;
425 0 : cLead = 0x8E;
426 0 : cTrail = c;
427 : }
428 : }
429 : else
430 : {
431 : /* SS3 - JIS 0212-1990 */
432 : /* 8F + A1-FE + A1-FE */
433 0 : if ( c == 0x8F )
434 : {
435 : /* Source buffer to small */
436 0 : if (pEndSrcBuf - pSrcBuf < 3)
437 : {
438 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
439 0 : break;
440 : }
441 :
442 0 : pSrcBuf++;
443 0 : cLead = (unsigned char)*pSrcBuf;
444 0 : pSrcBuf++;
445 0 : cTrail = (unsigned char)*pSrcBuf;
446 0 : pLeadTab = pConvertData->mpJIS0212ToUniLeadTab;
447 : }
448 : /* CodeSet 2 JIS 0208-1997 */
449 : /* A1-FE + A1-FE */
450 : else
451 : {
452 : /* Source buffer to small */
453 0 : if ( pSrcBuf + 1 == pEndSrcBuf )
454 : {
455 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL;
456 0 : break;
457 : }
458 :
459 0 : cLead = c;
460 0 : pSrcBuf++;
461 0 : cTrail = (unsigned char)*pSrcBuf;
462 0 : pLeadTab = pConvertData->mpJIS0208ToUniLeadTab;
463 : }
464 :
465 : /* Undefined Range */
466 0 : if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
467 0 : cConv = 0;
468 : else
469 : {
470 0 : cLead -= JIS_EUC_LEAD_OFF;
471 0 : cTrail -= JIS_EUC_TRAIL_OFF;
472 0 : pLeadEntry = pLeadTab+cLead;
473 0 : if ( (cTrail >= pLeadEntry->mnTrailStart) && (cTrail <= pLeadEntry->mnTrailEnd) )
474 0 : cConv = pLeadEntry->mpToUniTrailTab[cTrail-pLeadEntry->mnTrailStart];
475 : else
476 0 : cConv = 0;
477 : }
478 : }
479 :
480 0 : if ( !cConv )
481 : {
482 : /* We compare the full range of the trail we defined, */
483 : /* which can often be greater than the limit. We do this */
484 : /* so that extensions that don't consider encodings */
485 : /* correctly treat double-byte characters as a single */
486 : /* character as much as possible. */
487 :
488 0 : if ( (cLead < JIS_EUC_LEAD_OFF) || (cTrail < JIS_EUC_TRAIL_OFF) )
489 : {
490 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_INVALID;
491 0 : if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR )
492 : {
493 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
494 0 : break;
495 : }
496 0 : else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_INVALID_MASK) == RTL_TEXTTOUNICODE_FLAGS_INVALID_IGNORE )
497 : {
498 0 : pSrcBuf++;
499 0 : continue;
500 : }
501 : else
502 0 : cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
503 : }
504 : else
505 : {
506 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_MBUNDEFINED;
507 0 : if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR )
508 : {
509 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR;
510 0 : break;
511 : }
512 0 : else if ( (nFlags & RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_MASK) == RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_IGNORE )
513 : {
514 0 : pSrcBuf++;
515 0 : continue;
516 : }
517 : else
518 0 : cConv = RTL_TEXTENC_UNICODE_REPLACEMENT_CHARACTER;
519 : }
520 : }
521 : }
522 :
523 0 : if ( pDestBuf == pEndDestBuf )
524 : {
525 0 : *pInfo |= RTL_TEXTTOUNICODE_INFO_ERROR | RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
526 0 : break;
527 : }
528 :
529 0 : *pDestBuf = cConv;
530 0 : pDestBuf++;
531 0 : pSrcBuf++;
532 : }
533 :
534 0 : *pSrcCvtBytes = nSrcBytes - (pEndSrcBuf-pSrcBuf);
535 0 : return (nDestChars - (pEndDestBuf-pDestBuf));
536 : }
537 :
538 : /* ----------------------------------------------------------------------- */
539 :
540 0 : sal_Size ImplUnicodeToEUCJP( const void* pData,
541 : SAL_UNUSED_PARAMETER void*,
542 : const sal_Unicode* pSrcBuf, sal_Size nSrcChars,
543 : char* pDestBuf, sal_Size nDestBytes,
544 : sal_uInt32 nFlags, sal_uInt32* pInfo,
545 : sal_Size* pSrcCvtChars )
546 : {
547 : sal_uInt32 cConv;
548 : sal_Unicode c;
549 : unsigned char nHighChar;
550 : unsigned char nLowChar;
551 : const ImplUniToDBCSHighTab* pHighEntry;
552 : const ImplUniToDBCSHighTab* pHighTab;
553 0 : const ImplEUCJPConvertData* pConvertData = (const ImplEUCJPConvertData*)pData;
554 : char* pEndDestBuf;
555 : const sal_Unicode* pEndSrcBuf;
556 :
557 0 : *pInfo = 0;
558 0 : pEndDestBuf = pDestBuf+nDestBytes;
559 0 : pEndSrcBuf = pSrcBuf+nSrcChars;
560 0 : while ( pSrcBuf < pEndSrcBuf )
561 : {
562 0 : c = *pSrcBuf;
563 :
564 : /* ASCII */
565 0 : if ( c <= 0x7F )
566 0 : cConv = c;
567 : /* Half-width katakana */
568 0 : else if ( (c >= 0xFF61) && (c <= 0xFF9F) )
569 0 : cConv = 0x8E00+0xA1+(c-0xFF61);
570 : else
571 : {
572 0 : nHighChar = (unsigned char)((c >> 8) & 0xFF);
573 0 : nLowChar = (unsigned char)(c & 0xFF);
574 :
575 : /* JIS 0208 */
576 0 : pHighTab = pConvertData->mpUniToJIS0208HighTab;
577 0 : pHighEntry = pHighTab+nHighChar;
578 0 : if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
579 : {
580 0 : cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
581 0 : if (cConv != 0)
582 0 : cConv |= 0x8080;
583 : }
584 : else
585 0 : cConv = 0;
586 :
587 : /* JIS 0212 */
588 0 : if ( !cConv )
589 : {
590 0 : pHighTab = pConvertData->mpUniToJIS0212HighTab;
591 0 : pHighEntry = pHighTab+nHighChar;
592 0 : if ( (nLowChar >= pHighEntry->mnLowStart) && (nLowChar <= pHighEntry->mnLowEnd) )
593 : {
594 0 : cConv = pHighEntry->mpToUniTrailTab[nLowChar-pHighEntry->mnLowStart];
595 0 : if (cConv != 0)
596 0 : cConv |= 0x8F8080;
597 : }
598 :
599 0 : if ( !cConv )
600 : {
601 0 : if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACE )
602 : {
603 : /* !!! */
604 : }
605 :
606 0 : if ( nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_REPLACESTR )
607 : {
608 : /* !!! */
609 : }
610 :
611 : /* Handle undefined and surrogates characters */
612 : /* (all surrogates characters are undefined) */
613 0 : if (sal::detail::textenc::handleUndefinedUnicodeToTextChar(
614 : &pSrcBuf, pEndSrcBuf, &pDestBuf, pEndDestBuf,
615 : nFlags, pInfo))
616 0 : continue;
617 : else
618 0 : break;
619 : }
620 : }
621 : }
622 :
623 : /* SingleByte */
624 0 : if ( !(cConv & 0xFFFF00) )
625 : {
626 0 : if ( pDestBuf == pEndDestBuf )
627 : {
628 0 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
629 0 : break;
630 : }
631 :
632 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
633 0 : pDestBuf++;
634 : }
635 : /* DoubleByte */
636 0 : else if ( !(cConv & 0xFF0000) )
637 : {
638 0 : if ( pDestBuf+1 >= pEndDestBuf )
639 : {
640 0 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
641 0 : break;
642 : }
643 :
644 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
645 0 : pDestBuf++;
646 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
647 0 : pDestBuf++;
648 : }
649 : else
650 : {
651 0 : if ( pDestBuf+2 >= pEndDestBuf )
652 : {
653 0 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
654 0 : break;
655 : }
656 :
657 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 16) & 0xFF));
658 0 : pDestBuf++;
659 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >((cConv >> 8) & 0xFF));
660 0 : pDestBuf++;
661 0 : *pDestBuf = static_cast< char >(static_cast< unsigned char >(cConv & 0xFF));
662 0 : pDestBuf++;
663 : }
664 :
665 0 : pSrcBuf++;
666 : }
667 :
668 0 : *pSrcCvtChars = nSrcChars - (pEndSrcBuf-pSrcBuf);
669 0 : return (nDestBytes - (pEndDestBuf-pDestBuf));
670 : }
671 :
672 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|