Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "sal/config.h"
21 :
22 : #include <cstddef>
23 :
24 : #include "rtl/textcvt.h"
25 : #include "sal/types.h"
26 :
27 : #include "context.hxx"
28 : #include "converter.hxx"
29 : #include "convertsinglebytetobmpunicode.hxx"
30 : #include "unichars.hxx"
31 :
32 600 : sal_Size rtl_textenc_convertSingleByteToBmpUnicode(
33 : void const * data, SAL_UNUSED_PARAMETER void *, sal_Char const * srcBuf,
34 : sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars,
35 : sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes)
36 : {
37 : sal_Unicode const * map = static_cast<
38 : rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
39 600 : data)->byteToUnicode;
40 600 : sal_uInt32 infoFlags = 0;
41 600 : sal_Size converted = 0;
42 600 : sal_Unicode * destBufPtr = destBuf;
43 600 : sal_Unicode * destBufEnd = destBuf + destChars;
44 1517 : for (; converted < srcBytes; ++converted) {
45 1145 : bool undefined = true;
46 1145 : sal_Char b = *srcBuf++;
47 1145 : sal_Unicode c = map[static_cast< sal_uInt8 >(b)];
48 1145 : if (c == 0xFFFF) {
49 228 : goto bad_input;
50 : }
51 917 : if (destBufEnd - destBufPtr < 1) {
52 0 : goto no_output;
53 : }
54 917 : *destBufPtr++ = c;
55 917 : continue;
56 : bad_input:
57 228 : switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
58 : undefined, false, b, flags, &destBufPtr, destBufEnd,
59 228 : &infoFlags))
60 : {
61 : case sal::detail::textenc::BAD_INPUT_STOP:
62 228 : break;
63 :
64 : case sal::detail::textenc::BAD_INPUT_CONTINUE:
65 0 : continue;
66 :
67 : case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
68 0 : goto no_output;
69 : }
70 228 : break;
71 : no_output:
72 0 : --srcBuf;
73 0 : infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
74 0 : break;
75 : }
76 600 : if (info != 0) {
77 600 : *info = infoFlags;
78 : }
79 600 : if (srcCvtBytes != 0) {
80 600 : *srcCvtBytes = converted;
81 : }
82 600 : return destBufPtr - destBuf;
83 : }
84 :
85 413 : sal_Size rtl_textenc_convertBmpUnicodeToSingleByte(
86 : void const * data, void * context,
87 : sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf,
88 : sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info,
89 : sal_Size * srcCvtChars)
90 : {
91 : std::size_t entries = static_cast<
92 : rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
93 413 : data)->unicodeToByteEntries;
94 : rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast<
95 : rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
96 413 : data)->unicodeToByte;
97 413 : sal_Unicode highSurrogate = 0;
98 413 : sal_uInt32 infoFlags = 0;
99 413 : sal_Size converted = 0;
100 413 : sal_Char * destBufPtr = destBuf;
101 413 : sal_Char * destBufEnd = destBuf + destBytes;
102 413 : if (context != 0) {
103 : highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)->
104 7 : m_nHighSurrogate;
105 : }
106 1379 : for (; converted < srcChars; ++converted) {
107 966 : bool undefined = true;
108 966 : sal_uInt32 c = *srcBuf++;
109 966 : if (highSurrogate == 0) {
110 966 : if (ImplIsHighSurrogate(c)) {
111 0 : highSurrogate = static_cast< sal_Unicode >(c);
112 0 : continue;
113 : }
114 0 : } else if (ImplIsLowSurrogate(c)) {
115 0 : c = ImplCombineSurrogates(highSurrogate, c);
116 : } else {
117 0 : undefined = false;
118 0 : goto bad_input;
119 : }
120 966 : if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c)) {
121 0 : undefined = false;
122 0 : goto bad_input;
123 : }
124 : // Linearly searching through the ranges if probably fastest, assuming
125 : // that most converted characters belong to the ASCII subset:
126 38229 : for (std::size_t i = 0; i < entries; ++i) {
127 38229 : if (c < ranges[i].unicode) {
128 0 : break;
129 38229 : } else if (c <= sal::static_int_cast< sal_uInt32 >(
130 38229 : ranges[i].unicode + ranges[i].range))
131 : {
132 966 : if (destBufEnd - destBufPtr < 1) {
133 0 : goto no_output;
134 : }
135 : *destBufPtr++ = static_cast< sal_Char >(
136 966 : ranges[i].byte + (c - ranges[i].unicode));
137 966 : goto done;
138 : }
139 : }
140 0 : goto bad_input;
141 : done:
142 966 : highSurrogate = 0;
143 966 : continue;
144 : bad_input:
145 0 : switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
146 : undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
147 0 : 0, 0))
148 : {
149 : case sal::detail::textenc::BAD_INPUT_STOP:
150 0 : highSurrogate = 0;
151 0 : break;
152 :
153 : case sal::detail::textenc::BAD_INPUT_CONTINUE:
154 0 : highSurrogate = 0;
155 0 : continue;
156 :
157 : case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
158 0 : goto no_output;
159 : }
160 0 : break;
161 : no_output:
162 0 : --srcBuf;
163 0 : infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
164 0 : break;
165 : }
166 413 : if (highSurrogate != 0
167 0 : && ((infoFlags
168 0 : & (RTL_UNICODETOTEXT_INFO_ERROR
169 : | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
170 : == 0))
171 : {
172 0 : if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) {
173 0 : infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
174 : } else {
175 0 : switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
176 : false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
177 0 : 0, 0))
178 : {
179 : case sal::detail::textenc::BAD_INPUT_STOP:
180 : case sal::detail::textenc::BAD_INPUT_CONTINUE:
181 0 : highSurrogate = 0;
182 0 : break;
183 :
184 : case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
185 0 : infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
186 0 : break;
187 : }
188 : }
189 : }
190 413 : if (context != 0) {
191 : static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate
192 7 : = highSurrogate;
193 : }
194 413 : if (info != 0) {
195 413 : *info = infoFlags;
196 : }
197 413 : if (srcCvtChars != 0) {
198 413 : *srcCvtChars = converted;
199 : }
200 413 : return destBufPtr - destBuf;
201 : }
202 :
203 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|