Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "sal/config.h"
21 :
22 : #include <cstddef>
23 :
24 : #include "osl/diagnose.h"
25 : #include "rtl/textcvt.h"
26 : #include "sal/types.h"
27 :
28 : #include "context.hxx"
29 : #include "converter.hxx"
30 : #include "convertsinglebytetobmpunicode.hxx"
31 : #include "unichars.hxx"
32 :
33 1200 : sal_Size rtl_textenc_convertSingleByteToBmpUnicode(
34 : void const * data, SAL_UNUSED_PARAMETER void *, sal_Char const * srcBuf,
35 : sal_Size srcBytes, sal_Unicode * destBuf, sal_Size destChars,
36 : sal_uInt32 flags, sal_uInt32 * info, sal_Size * srcCvtBytes)
37 : {
38 : sal_Unicode const * map = static_cast<
39 : rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
40 1200 : data)->byteToUnicode;
41 1200 : sal_uInt32 infoFlags = 0;
42 1200 : sal_Size converted = 0;
43 1200 : sal_Unicode * destBufPtr = destBuf;
44 1200 : sal_Unicode * destBufEnd = destBuf + destChars;
45 3034 : for (; converted < srcBytes; ++converted) {
46 2290 : bool undefined = true;
47 2290 : sal_Char b = *srcBuf++;
48 2290 : sal_Unicode c = map[static_cast< sal_uInt8 >(b)];
49 2290 : if (c == 0xFFFF) {
50 456 : goto bad_input;
51 : }
52 1834 : if (destBufEnd - destBufPtr < 1) {
53 0 : goto no_output;
54 : }
55 1834 : *destBufPtr++ = c;
56 1834 : continue;
57 : bad_input:
58 456 : switch (sal::detail::textenc::handleBadInputTextToUnicodeConversion(
59 : undefined, false, b, flags, &destBufPtr, destBufEnd,
60 456 : &infoFlags))
61 : {
62 : case sal::detail::textenc::BAD_INPUT_STOP:
63 456 : break;
64 :
65 : case sal::detail::textenc::BAD_INPUT_CONTINUE:
66 0 : continue;
67 :
68 : case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
69 0 : goto no_output;
70 : }
71 456 : break;
72 : no_output:
73 0 : --srcBuf;
74 0 : infoFlags |= RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL;
75 0 : break;
76 : }
77 1200 : if (info != 0) {
78 1200 : *info = infoFlags;
79 : }
80 1200 : if (srcCvtBytes != 0) {
81 1200 : *srcCvtBytes = converted;
82 : }
83 1200 : return destBufPtr - destBuf;
84 : }
85 :
86 856 : sal_Size rtl_textenc_convertBmpUnicodeToSingleByte(
87 : void const * data, void * context,
88 : sal_Unicode const * srcBuf, sal_Size srcChars, sal_Char * destBuf,
89 : sal_Size destBytes, sal_uInt32 flags, sal_uInt32 * info,
90 : sal_Size * srcCvtChars)
91 : {
92 : std::size_t entries = static_cast<
93 : rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
94 856 : data)->unicodeToByteEntries;
95 : rtl::textenc::BmpUnicodeToSingleByteRange const * ranges = static_cast<
96 : rtl::textenc::BmpUnicodeToSingleByteConverterData const * >(
97 856 : data)->unicodeToByte;
98 856 : sal_Unicode highSurrogate = 0;
99 856 : sal_uInt32 infoFlags = 0;
100 856 : sal_Size converted = 0;
101 856 : sal_Char * destBufPtr = destBuf;
102 856 : sal_Char * destBufEnd = destBuf + destBytes;
103 856 : if (context != 0) {
104 : highSurrogate = static_cast< ImplUnicodeToTextContext * >(context)->
105 14 : m_nHighSurrogate;
106 : }
107 2818 : for (; converted < srcChars; ++converted) {
108 1962 : bool undefined = true;
109 1962 : sal_uInt32 c = *srcBuf++;
110 1962 : if (highSurrogate == 0) {
111 1962 : if (ImplIsHighSurrogate(c)) {
112 0 : highSurrogate = static_cast< sal_Unicode >(c);
113 0 : continue;
114 : }
115 0 : } else if (ImplIsLowSurrogate(c)) {
116 0 : c = ImplCombineSurrogates(highSurrogate, c);
117 : } else {
118 0 : undefined = false;
119 0 : goto bad_input;
120 : }
121 1962 : if (ImplIsLowSurrogate(c) || ImplIsNoncharacter(c)) {
122 0 : undefined = false;
123 0 : goto bad_input;
124 : }
125 : // Linearly searching through the ranges if probably fastest, assuming
126 : // that most converted characters belong to the ASCII subset:
127 77836 : for (std::size_t i = 0; i < entries; ++i) {
128 77836 : if (c < ranges[i].unicode) {
129 0 : break;
130 77836 : } else if (c <= sal::static_int_cast< sal_uInt32 >(
131 77836 : ranges[i].unicode + ranges[i].range))
132 : {
133 1962 : if (destBufEnd - destBufPtr < 1) {
134 0 : goto no_output;
135 : }
136 : *destBufPtr++ = static_cast< sal_Char >(
137 1962 : ranges[i].byte + (c - ranges[i].unicode));
138 1962 : goto done;
139 : }
140 : }
141 0 : goto bad_input;
142 : done:
143 1962 : highSurrogate = 0;
144 1962 : continue;
145 : bad_input:
146 0 : switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
147 : undefined, c, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
148 0 : 0, 0))
149 : {
150 : case sal::detail::textenc::BAD_INPUT_STOP:
151 0 : highSurrogate = 0;
152 0 : break;
153 :
154 : case sal::detail::textenc::BAD_INPUT_CONTINUE:
155 0 : highSurrogate = 0;
156 0 : continue;
157 :
158 : case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
159 0 : goto no_output;
160 : }
161 0 : break;
162 : no_output:
163 0 : --srcBuf;
164 0 : infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
165 0 : break;
166 : }
167 856 : if (highSurrogate != 0
168 0 : && ((infoFlags
169 0 : & (RTL_UNICODETOTEXT_INFO_ERROR
170 : | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL))
171 : == 0))
172 : {
173 0 : if ((flags & RTL_UNICODETOTEXT_FLAGS_FLUSH) != 0) {
174 0 : infoFlags |= RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
175 : } else {
176 0 : switch (sal::detail::textenc::handleBadInputUnicodeToTextConversion(
177 : false, 0, flags, &destBufPtr, destBufEnd, &infoFlags, 0,
178 0 : 0, 0))
179 : {
180 : case sal::detail::textenc::BAD_INPUT_STOP:
181 : case sal::detail::textenc::BAD_INPUT_CONTINUE:
182 0 : highSurrogate = 0;
183 0 : break;
184 :
185 : case sal::detail::textenc::BAD_INPUT_NO_OUTPUT:
186 0 : infoFlags |= RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
187 0 : break;
188 : }
189 : }
190 : }
191 856 : if (context != 0) {
192 : static_cast< ImplUnicodeToTextContext * >(context)->m_nHighSurrogate
193 14 : = highSurrogate;
194 : }
195 856 : if (info != 0) {
196 856 : *info = infoFlags;
197 : }
198 856 : if (srcCvtChars != 0) {
199 856 : *srcCvtChars = converted;
200 : }
201 856 : return destBufPtr - destBuf;
202 : }
203 :
204 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|