Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * Version: MPL 1.1 / GPLv3+ / LGPLv3+
4 : *
5 : * The contents of this file are subject to the Mozilla Public License Version
6 : * 1.1 (the "License"); you may not use this file except in compliance with
7 : * the License or as specified alternatively below. You may obtain a copy of
8 : * the License at http://www.mozilla.org/MPL/
9 : *
10 : * Software distributed under the License is distributed on an "AS IS" basis,
11 : * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 : * for the specific language governing rights and limitations under the
13 : * License.
14 : *
15 : * Major Contributor(s):
16 : * [ Copyright (C) 2012 Red Hat, Inc., Stephan Bergmann <sbergman@redhat.com>
17 : * (initial developer) ]
18 : *
19 : * All Rights Reserved.
20 : *
21 : * For minor contributions see the git repository.
22 : *
23 : * Alternatively, the contents of this file may be used under the terms of
24 : * either the GNU General Public License Version 3 or later (the "GPLv3+"), or
25 : * the GNU Lesser General Public License Version 3 or later (the "LGPLv3+"),
26 : * in which case the provisions of the GPLv3+ or the LGPLv3+ are applicable
27 : * instead of those above.
28 : */
29 :
30 : #include "sal/config.h"
31 :
32 : #include "rtl/textcvt.h"
33 : #include "sal/types.h"
34 :
35 : #include "handleundefinedunicodetotextchar.hxx"
36 : #include "tenchelp.hxx"
37 :
38 : namespace {
39 :
40 378608 : bool ImplIsUnicodeIgnoreChar(sal_Unicode c, sal_uInt32 nFlags)
41 : {
42 : return
43 : ((nFlags & RTL_UNICODETOTEXT_FLAGS_NONSPACING_IGNORE) != 0
44 0 : && ImplIsZeroWidth(c))
45 : || ((nFlags & RTL_UNICODETOTEXT_FLAGS_CONTROL_IGNORE) != 0
46 0 : && ImplIsControlOrFormat(c))
47 : || ((nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_IGNORE) != 0
48 378608 : && ImplIsPrivateUse(c));
49 : }
50 :
51 5 : bool ImplGetUndefinedAsciiMultiByte(sal_uInt32 nFlags,
52 : char * pBuf,
53 : sal_Size nMaxLen)
54 : {
55 5 : if (nMaxLen == 0)
56 0 : return false;
57 5 : switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
58 : {
59 : case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_0:
60 0 : *pBuf = 0x00;
61 0 : break;
62 :
63 : case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_QUESTIONMARK:
64 : default: /* RTL_UNICODETOTEXT_FLAGS_UNDEFINED_DEFAULT */
65 5 : *pBuf = 0x3F;
66 5 : break;
67 :
68 : case RTL_UNICODETOTEXT_FLAGS_UNDEFINED_UNDERLINE:
69 0 : *pBuf = 0x5F;
70 0 : break;
71 : }
72 5 : return true;
73 : }
74 :
75 0 : bool ImplGetInvalidAsciiMultiByte(sal_uInt32 nFlags,
76 : char * pBuf,
77 : sal_Size nMaxLen)
78 : {
79 0 : if (nMaxLen == 0)
80 0 : return false;
81 0 : switch (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK)
82 : {
83 : case RTL_UNICODETOTEXT_FLAGS_INVALID_0:
84 0 : *pBuf = 0x00;
85 0 : break;
86 :
87 : case RTL_UNICODETOTEXT_FLAGS_INVALID_QUESTIONMARK:
88 : default: /* RTL_UNICODETOTEXT_FLAGS_INVALID_DEFAULT */
89 0 : *pBuf = 0x3F;
90 0 : break;
91 :
92 : case RTL_UNICODETOTEXT_FLAGS_INVALID_UNDERLINE:
93 0 : *pBuf = 0x5F;
94 0 : break;
95 : }
96 0 : return true;
97 : }
98 :
99 : }
100 :
101 378608 : bool sal::detail::textenc::handleUndefinedUnicodeToTextChar(
102 : sal_Unicode const ** ppSrcBuf, sal_Unicode const * pEndSrcBuf,
103 : char ** ppDestBuf, char const * pEndDestBuf, sal_uInt32 nFlags,
104 : sal_uInt32 * pInfo)
105 : {
106 378608 : sal_Unicode c = **ppSrcBuf;
107 :
108 : /* Should the private character map to one byte */
109 378608 : if ( (c >= RTL_TEXTCVT_BYTE_PRIVATE_START) && (c <= RTL_TEXTCVT_BYTE_PRIVATE_END) )
110 : {
111 0 : if ( nFlags & RTL_UNICODETOTEXT_FLAGS_PRIVATE_MAPTO0 )
112 : {
113 0 : **ppDestBuf = (char)(sal_uChar)(c-RTL_TEXTCVT_BYTE_PRIVATE_START);
114 0 : (*ppDestBuf)++;
115 0 : (*ppSrcBuf)++;
116 0 : return true;
117 : }
118 : }
119 :
120 : /* Should this character ignored (Private, Non Spacing, Control) */
121 378608 : if ( ImplIsUnicodeIgnoreChar( c, nFlags ) )
122 : {
123 0 : (*ppSrcBuf)++;
124 0 : return true;
125 : }
126 :
127 : /* Surrogates Characters should result in */
128 : /* one replacement character */
129 378608 : if (ImplIsHighSurrogate(c))
130 : {
131 15360 : if ( *ppSrcBuf == pEndSrcBuf )
132 : {
133 0 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR | RTL_UNICODETOTEXT_INFO_SRCBUFFERTOSMALL;
134 0 : return false;
135 : }
136 :
137 15360 : c = *((*ppSrcBuf)+1);
138 15360 : if (ImplIsLowSurrogate(c))
139 0 : (*ppSrcBuf)++;
140 : else
141 : {
142 15360 : *pInfo |= RTL_UNICODETOTEXT_INFO_INVALID;
143 15360 : if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR )
144 : {
145 15360 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
146 15360 : return false;
147 : }
148 0 : else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_INVALID_MASK) == RTL_UNICODETOTEXT_FLAGS_INVALID_IGNORE )
149 : {
150 0 : (*ppSrcBuf)++;
151 0 : return true;
152 : }
153 0 : else if (ImplGetInvalidAsciiMultiByte(nFlags,
154 : *ppDestBuf,
155 0 : pEndDestBuf - *ppDestBuf))
156 : {
157 0 : ++*ppSrcBuf;
158 0 : ++*ppDestBuf;
159 0 : return true;
160 : }
161 : else
162 : {
163 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
164 0 : | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
165 0 : return false;
166 : }
167 : }
168 : }
169 :
170 363248 : *pInfo |= RTL_UNICODETOTEXT_INFO_UNDEFINED;
171 363248 : if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR )
172 : {
173 362907 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR;
174 362907 : return false;
175 : }
176 341 : else if ( (nFlags & RTL_UNICODETOTEXT_FLAGS_UNDEFINED_MASK) == RTL_UNICODETOTEXT_FLAGS_UNDEFINED_IGNORE )
177 336 : (*ppSrcBuf)++;
178 5 : else if (ImplGetUndefinedAsciiMultiByte(nFlags,
179 : *ppDestBuf,
180 5 : pEndDestBuf - *ppDestBuf))
181 : {
182 5 : ++*ppSrcBuf;
183 5 : ++*ppDestBuf;
184 : }
185 : else
186 : {
187 : *pInfo |= RTL_UNICODETOTEXT_INFO_ERROR
188 0 : | RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL;
189 0 : return false;
190 : }
191 :
192 341 : return true;
193 : }
|