Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "sal/config.h"
21 :
22 : #include <cstddef>
23 :
24 : #include "osl/diagnose.h"
25 : #include "rtl/textcvt.h"
26 : #include "sal/types.h"
27 :
28 : #include "context.hxx"
29 : #include "converter.hxx"
30 : #include "convertsinglebytetobmpunicode.hxx"
31 : #include "unichars.hxx"
32 :
33 233 : sal_Size rtl_textenc_convertSingleByteToBmpUnicode(
34 : void const * data, SAL_UNUSED_PARAMETER void *, sal_Char const * srcBuf,
35 : sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars,
36 : sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes)
37 : {
38 : sal_Unicode const * map = static_cast<
39 : rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
40 233 : data)->byteToUnicode;
41 233 : sal_uInt32 infoFlags = 0;
42 233 : sal_Size converted = 0;
43 233 : sal_Unicode * destBufPtr = destBuf;
44 233 : sal_Unicode * destBufEnd = destBuf + destChars;
45 783 : for (; converted < srcBytes; ++converted) {
46 778 : bool undefined = true;
47 778 : sal_Char b = *srcBuf++;
48 778 : sal_Unicode c = map[static_cast< sal_uInt8 >(b)];
49 778 : if (c == 0xFFFF) {
50 228 : goto bad_input;
51 : }
52 550 : if (destBufEnd - destBufPtr < 1) {
53 0 : goto no_output;
54 : }
55 550 : *destBufPtr++ = c;
56 550 : continue;
57 : bad_input:
58 228 : switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
59 : undefined, false, b, flags, &destBufPtr, destBufEnd,
60 228 : &infoFlags))
61 : {
62 : case sal::detail::textenc::BAD_INPUT_STOP:
63 228 : break;
64 :
65 : case sal::detail::textenc::BAD_INPUT_CONTINUE:
66 0 : continue;
67 :
68 : case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
69 0 : goto no_output;
70 : }
71 228 : break;
72 : no_output:
73 0 : --srcBuf;
74 0 : infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
75 0 : break;
76 : }
77 233 : if (info != 0) {
78 233 : *info = infoFlags;
79 : }
80 233 : if (srcCvtBytes != 0) {
81 233 : *srcCvtBytes = converted;
82 : }
83 233 : return destBufPtr - destBuf;
84 : }
85 :
86 7 : sal_Size rtl_textenc_convertBmpUnicodeToSingleByte(
87 : void const * data, void * context,
88 : sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf,
89 : sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info,
90 : sal_Size * srcCvtChars)
91 : {
92 : std::size_t entries = static_cast<
93 : rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
94 7 : data)->unicodeToByteEntries;
95 : rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast<
96 : rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
97 7 : data)->unicodeToByte;
98 7 : sal_Unicode highSurrogate = 0;
99 7 : sal_uInt32 infoFlags = 0;
100 7 : sal_Size converted = 0;
101 7 : sal_Char * destBufPtr = destBuf;
102 7 : sal_Char * destBufEnd = destBuf + destBytes;
103 7 : if (context != 0) {
104 : highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)->
105 7 : m_nHighSurrogate;
106 : }
107 567 : for (; converted < srcChars; ++converted) {
108 560 : bool undefined = true;
109 560 : sal_uInt32 c = *srcBuf++;
110 560 : if (highSurrogate == 0) {
111 560 : if (ImplIsHighSurrogate(c)) {
112 0 : highSurrogate = static_cast< sal_Unicode >(c);
113 0 : continue;
114 : }
115 0 : } else if (ImplIsLowSurrogate(c)) {
116 0 : c = ImplCombineSurrogates(highSurrogate, c);
117 : } else {
118 0 : undefined = false;
119 0 : goto bad_input;
120 : }
121 560 : if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c)) {
122 0 : undefined = false;
123 0 : goto bad_input;
124 : }
125 : // Linearly searching through the ranges if probably fastest, assuming
126 : // that most converted characters belong to the ASCII subset:
127 18980 : for (std::size_t i = 0; i < entries; ++i) {
128 18980 : if (c < ranges[i].unicode) {
129 0 : break;
130 18980 : } else if (c <= sal::static_int_cast< sal_uInt32 >(
131 18980 : ranges[i].unicode + ranges[i].range))
132 : {
133 560 : if (destBufEnd - destBufPtr < 1) {
134 0 : goto no_output;
135 : }
136 : *destBufPtr++ = static_cast< sal_Char >(
137 560 : ranges[i].byte + (c - ranges[i].unicode));
138 560 : goto done;
139 : }
140 : }
141 0 : goto bad_input;
142 : done:
143 560 : highSurrogate = 0;
144 560 : continue;
145 : bad_input:
146 0 : switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
147 : undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
148 0 : 0, 0))
149 : {
150 : case sal::detail::textenc::BAD_INPUT_STOP:
151 0 : highSurrogate = 0;
152 0 : break;
153 :
154 : case sal::detail::textenc::BAD_INPUT_CONTINUE:
155 0 : highSurrogate = 0;
156 0 : continue;
157 :
158 : case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
159 0 : goto no_output;
160 : }
161 0 : break;
162 : no_output:
163 0 : --srcBuf;
164 0 : infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
165 0 : break;
166 : }
167 7 : if (highSurrogate != 0
168 0 : && ((infoFlags
169 0 : & (RTL_UNICODETOTEXT_INFO_ERROR
170 : | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
171 : == 0))
172 : {
173 0 : if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) {
174 0 : infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
175 : } else {
176 0 : switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
177 : false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
178 0 : 0, 0))
179 : {
180 : case sal::detail::textenc::BAD_INPUT_STOP:
181 : case sal::detail::textenc::BAD_INPUT_CONTINUE:
182 0 : highSurrogate = 0;
183 0 : break;
184 :
185 : case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
186 0 : infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
187 0 : break;
188 : }
189 : }
190 : }
191 7 : if (context != 0) {
192 : static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate
193 7 : = highSurrogate;
194 : }
195 7 : if (info != 0) {
196 7 : *info = infoFlags;
197 : }
198 7 : if (srcCvtChars != 0) {
199 7 : *srcCvtChars = converted;
200 : }
201 7 : return destBufPtr - destBuf;
202 : }
203 :
204 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|