Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "surrogates.hxx"
21 :
22 : #include "osl/diagnose.h"
23 : #include "rtl/character.hxx"
24 : #include "rtl/strbuf.hxx"
25 : #include "rtl/textenc.h"
26 : #include "rtl/textcvt.h"
27 : #include "rtl/uri.h"
28 : #include "rtl/ustrbuf.h"
29 : #include "rtl/ustrbuf.hxx"
30 : #include "rtl/ustring.h"
31 : #include "rtl/ustring.hxx"
32 : #include "sal/types.h"
33 : #include "sal/macros.h"
34 :
35 : #include <algorithm>
36 : #include <cstddef>
37 :
38 : namespace {
39 :
40 : std::size_t const nCharClassSize = 128;
41 :
42 : sal_Unicode const cEscapePrefix = 0x25; // '%'
43 :
44 200232329 : inline bool isHighSurrogate(sal_uInt32 nUtf16)
45 : {
46 200232329 : return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
47 : }
48 :
49 0 : inline bool isLowSurrogate(sal_uInt32 nUtf16)
50 : {
51 0 : return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
52 : }
53 :
54 0 : inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
55 : {
56 0 : return SAL_RTL_COMBINE_SURROGATES(high, low);
57 : }
58 :
59 0 : inline int getHexWeight(sal_uInt32 nUtf32)
60 : {
61 0 : return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
62 0 : static_cast< int >(nUtf32 - 0x30) :
63 0 : nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
64 0 : static_cast< int >(nUtf32 - 0x41 + 10) :
65 0 : nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
66 0 : static_cast< int >(nUtf32 - 0x61 + 10) :
67 0 : -1; // not a hex digit
68 : }
69 :
70 69336703 : inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
71 : {
72 69336703 : return nUtf32 < nCharClassSize && pCharClass[nUtf32];
73 : }
74 :
75 200232329 : inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
76 : sal_Unicode cChar)
77 : {
78 200232329 : rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
79 200232329 : }
80 :
81 : enum EscapeType
82 : {
83 : EscapeNo,
84 : EscapeChar,
85 : EscapeOctet
86 : };
87 :
88 : /* Read any of the following:
89 :
90 : - sequence of escape sequences representing character from eCharset,
91 : translated to single UCS4 character; or
92 :
93 : - pair of UTF-16 surrogates, translated to single UCS4 character; or
94 :
95 : _ single UTF-16 character, extended to UCS4 character.
96 : */
97 200232329 : sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
98 : bool bEncoded, rtl_TextEncoding eCharset,
99 : EscapeType * pType)
100 : {
101 200232329 : sal_uInt32 nChar = *(*pBegin)++;
102 : int nWeight1;
103 : int nWeight2;
104 200232329 : if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
105 0 : && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
106 200232329 : && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
107 : {
108 0 : *pBegin += 2;
109 0 : nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
110 0 : if (nChar <= 0x7F)
111 0 : *pType = EscapeChar;
112 0 : else if (eCharset == RTL_TEXTENCODING_UTF8)
113 : {
114 0 : if (nChar >= 0xC0 && nChar <= 0xF4)
115 : {
116 : sal_uInt32 nEncoded;
117 : int nShift;
118 : sal_uInt32 nMin;
119 0 : if (nChar <= 0xDF)
120 : {
121 0 : nEncoded = (nChar & 0x1F) << 6;
122 0 : nShift = 0;
123 0 : nMin = 0x80;
124 : }
125 0 : else if (nChar <= 0xEF)
126 : {
127 0 : nEncoded = (nChar & 0x0F) << 12;
128 0 : nShift = 6;
129 0 : nMin = 0x800;
130 : }
131 : else
132 : {
133 0 : nEncoded = (nChar & 0x07) << 18;
134 0 : nShift = 12;
135 0 : nMin = 0x10000;
136 : }
137 0 : sal_Unicode const * p = *pBegin;
138 0 : bool bUTF8 = true;
139 0 : for (; nShift >= 0; nShift -= 6)
140 : {
141 0 : if (pEnd - p < 3 || p[0] != cEscapePrefix
142 0 : || (nWeight1 = getHexWeight(p[1])) < 8
143 0 : || nWeight1 > 11
144 0 : || (nWeight2 = getHexWeight(p[2])) < 0)
145 : {
146 0 : bUTF8 = false;
147 0 : break;
148 : }
149 0 : p += 3;
150 0 : nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
151 : }
152 0 : if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
153 0 : && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
154 : {
155 0 : *pBegin = p;
156 0 : *pType = EscapeChar;
157 0 : return nEncoded;
158 : }
159 : }
160 0 : *pType = EscapeOctet;
161 : }
162 : else
163 : {
164 0 : rtl::OStringBuffer aBuf;
165 0 : aBuf.append(static_cast< char >(nChar));
166 : rtl_TextToUnicodeConverter aConverter
167 0 : = rtl_createTextToUnicodeConverter(eCharset);
168 0 : sal_Unicode const * p = *pBegin;
169 : for (;;)
170 : {
171 : sal_Unicode aDst[2];
172 : sal_uInt32 nInfo;
173 : sal_Size nConverted;
174 : sal_Size nDstSize = rtl_convertTextToUnicode(
175 0 : aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
176 : SAL_N_ELEMENTS( aDst ),
177 : (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
178 : | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
179 : | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
180 0 : &nInfo, &nConverted);
181 0 : if (nInfo == 0)
182 : {
183 : assert( nConverted
184 : == sal::static_int_cast< sal_uInt32 >(
185 : aBuf.getLength()));
186 0 : rtl_destroyTextToUnicodeConverter(aConverter);
187 0 : *pBegin = p;
188 0 : *pType = EscapeChar;
189 : assert( nDstSize == 1
190 : || (nDstSize == 2 && isHighSurrogate(aDst[0])
191 : && isLowSurrogate(aDst[1])));
192 : return nDstSize == 1
193 0 : ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
194 : }
195 0 : else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
196 0 : && pEnd - p >= 3 && p[0] == cEscapePrefix
197 0 : && (nWeight1 = getHexWeight(p[1])) >= 0
198 0 : && (nWeight2 = getHexWeight(p[2])) >= 0)
199 : {
200 0 : p += 3;
201 0 : aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
202 : }
203 0 : else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
204 0 : && p != pEnd && *p <= 0x7F)
205 : {
206 0 : aBuf.append(static_cast< char >(*p++));
207 : }
208 : else
209 : {
210 : assert(
211 : (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
212 : == 0);
213 0 : break;
214 : }
215 0 : }
216 0 : rtl_destroyTextToUnicodeConverter(aConverter);
217 0 : *pType = EscapeOctet;
218 : }
219 0 : return nChar;
220 : }
221 : else
222 : {
223 200232329 : *pType = EscapeNo;
224 200232329 : return isHighSurrogate(nChar) && *pBegin < pEnd
225 0 : && isLowSurrogate(**pBegin) ?
226 200232329 : combineSurrogates(nChar, *(*pBegin)++) : nChar;
227 : }
228 : }
229 :
230 130895626 : void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
231 : {
232 : assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
233 130895626 : if (nUtf32 <= 0xFFFF) {
234 : writeUnicode(
235 130895626 : pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
236 : } else {
237 0 : nUtf32 -= 0x10000;
238 : writeUnicode(
239 : pBuffer, pCapacity,
240 0 : static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
241 : writeUnicode(
242 : pBuffer, pCapacity,
243 0 : static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
244 : }
245 130895626 : }
246 :
247 0 : void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
248 : sal_uInt32 nOctet)
249 : {
250 : assert(nOctet <= 0xFF); // bad octet
251 :
252 : static sal_Unicode const aHex[16]
253 : = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
254 : 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
255 :
256 0 : writeUnicode(pBuffer, pCapacity, cEscapePrefix);
257 0 : writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
258 0 : writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
259 0 : }
260 :
261 0 : bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
262 : sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
263 : {
264 : assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
265 0 : if (eCharset == RTL_TEXTENCODING_UTF8) {
266 0 : if (nUtf32 < 0x80)
267 0 : writeEscapeOctet(pBuffer, pCapacity, nUtf32);
268 0 : else if (nUtf32 < 0x800)
269 : {
270 0 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
271 0 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
272 : }
273 0 : else if (nUtf32 < 0x10000)
274 : {
275 0 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
276 0 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
277 0 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
278 : }
279 : else
280 : {
281 0 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
282 0 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
283 0 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
284 0 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
285 : }
286 : } else {
287 : rtl_UnicodeToTextConverter aConverter
288 0 : = rtl_createUnicodeToTextConverter(eCharset);
289 : sal_Unicode aSrc[2];
290 : sal_Size nSrcSize;
291 0 : if (nUtf32 <= 0xFFFF)
292 : {
293 0 : aSrc[0] = static_cast< sal_Unicode >(nUtf32);
294 0 : nSrcSize = 1;
295 : }
296 : else
297 : {
298 : aSrc[0] = static_cast< sal_Unicode >(
299 0 : ((nUtf32 - 0x10000) >> 10) | 0xD800);
300 : aSrc[1] = static_cast< sal_Unicode >(
301 0 : ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
302 0 : nSrcSize = 2;
303 : }
304 : sal_Char aDst[32]; // FIXME random value
305 : sal_uInt32 nInfo;
306 : sal_Size nConverted;
307 : sal_Size nDstSize = rtl_convertUnicodeToText(
308 : aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
309 : RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
310 : | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
311 : | RTL_UNICODETOTEXT_FLAGS_FLUSH,
312 0 : &nInfo, &nConverted);
313 : assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
314 0 : rtl_destroyUnicodeToTextConverter(aConverter);
315 0 : if (nInfo == 0) {
316 : assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
317 0 : for (sal_Size i = 0; i < nDstSize; ++i)
318 : writeEscapeOctet(pBuffer, pCapacity,
319 0 : static_cast< unsigned char >(aDst[i]));
320 : // FIXME all octets are escaped, even if there is no need
321 : } else {
322 0 : if (bStrict) {
323 0 : return false;
324 : } else {
325 0 : writeUcs4(pBuffer, pCapacity, nUtf32);
326 : }
327 : }
328 : }
329 0 : return true;
330 : }
331 :
332 : struct Component
333 : {
334 : sal_Unicode const * pBegin;
335 : sal_Unicode const * pEnd;
336 :
337 213600 : inline Component(): pBegin(0), pEnd(0) {}
338 :
339 170876 : inline bool isPresent() const { return pBegin != 0; }
340 :
341 : inline sal_Int32 getLength() const;
342 : };
343 :
344 85234 : inline sal_Int32 Component::getLength() const
345 : {
346 : assert(isPresent()); // taking length of non-present component
347 85234 : return static_cast< sal_Int32 >(pEnd - pBegin);
348 : }
349 :
350 42720 : struct Components
351 : {
352 : Component aScheme;
353 : Component aAuthority;
354 : Component aPath;
355 : Component aQuery;
356 : Component aFragment;
357 : };
358 :
359 42720 : void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
360 : {
361 : // This algorithm is liberal and accepts various forms of illegal input.
362 :
363 42720 : sal_Unicode const * pBegin = pUriRef->buffer;
364 42720 : sal_Unicode const * pEnd = pBegin + pUriRef->length;
365 42720 : sal_Unicode const * pPos = pBegin;
366 :
367 42720 : if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
368 : {
369 173949 : for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
370 : {
371 173949 : if (*p == ':')
372 : {
373 42716 : pComponents->aScheme.pBegin = pBegin;
374 42716 : pComponents->aScheme.pEnd = ++p;
375 42716 : pPos = p;
376 42716 : break;
377 : }
378 263070 : else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
379 131837 : && *p != '.')
380 : {
381 4 : break;
382 : }
383 : }
384 : }
385 :
386 42720 : if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
387 : {
388 42514 : pComponents->aAuthority.pBegin = pPos;
389 42514 : pPos += 2;
390 85028 : while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
391 0 : ++pPos;
392 42514 : pComponents->aAuthority.pEnd = pPos;
393 : }
394 :
395 42720 : pComponents->aPath.pBegin = pPos;
396 2811893 : while (pPos != pEnd && *pPos != '?' && * pPos != '#')
397 2726453 : ++pPos;
398 42720 : pComponents->aPath.pEnd = pPos;
399 :
400 42720 : if (pPos != pEnd && *pPos == '?')
401 : {
402 0 : pComponents->aQuery.pBegin = pPos++;
403 0 : while (pPos != pEnd && * pPos != '#')
404 0 : ++pPos;
405 0 : pComponents->aQuery.pEnd = pPos;
406 : }
407 :
408 42720 : if (pPos != pEnd)
409 : {
410 : assert(*pPos == '#');
411 0 : pComponents->aFragment.pBegin = pPos;
412 0 : pComponents->aFragment.pEnd = pEnd;
413 : }
414 42720 : }
415 :
416 42720 : void appendPath(
417 : rtl::OUStringBuffer & buffer, sal_Int32 bufferStart, bool precedingSlash,
418 : sal_Unicode const * pathBegin, sal_Unicode const * pathEnd)
419 : {
420 468463 : while (precedingSlash || pathBegin != pathEnd) {
421 383023 : sal_Unicode const * p = pathBegin;
422 3152133 : while (p != pathEnd && *p != '/') {
423 2386087 : ++p;
424 : }
425 383023 : std::size_t n = p - pathBegin;
426 383023 : if (n == 1 && pathBegin[0] == '.') {
427 : // input begins with "." -> remove from input (and done):
428 : // i.e., !precedingSlash -> !precedingSlash
429 : // input begins with "./" -> remove from input:
430 : // i.e., !precedingSlash -> !precedingSlash
431 : // input begins with "/." -> replace with "/" in input (and not yet
432 : // done):
433 : // i.e., precedingSlash -> precedingSlash
434 : // input begins with "/./" -> replace with "/" in input:
435 : // i.e., precedingSlash -> precedingSlash
436 383023 : } else if (n == 2 && pathBegin[0] == '.' && pathBegin[1] == '.') {
437 : // input begins with ".." -> remove from input (and done):
438 : // i.e., !precedingSlash -> !precedingSlash
439 : // input begins with "../" -> remove from input
440 : // i.e., !precedingSlash -> !precedingSlash
441 : // input begins with "/.." -> replace with "/" in input, and shrink
442 : // output (not not yet done):
443 : // i.e., precedingSlash -> precedingSlash
444 : // input begins with "/../" -> replace with "/" in input, and shrink
445 : // output:
446 : // i.e., precedingSlash -> precedingSlash
447 0 : if (precedingSlash) {
448 : buffer.truncate(
449 : bufferStart
450 : + std::max<sal_Int32>(
451 : rtl_ustr_lastIndexOfChar_WithLength(
452 0 : buffer.getStr() + bufferStart,
453 0 : buffer.getLength() - bufferStart, '/'),
454 0 : 0));
455 : }
456 : } else {
457 383023 : if (precedingSlash) {
458 340307 : buffer.append('/');
459 : }
460 383023 : buffer.append(pathBegin, n);
461 383023 : precedingSlash = p != pathEnd;
462 : }
463 383023 : pathBegin = p + (p == pathEnd ? 0 : 1);
464 : }
465 42720 : }
466 :
467 : }
468 :
469 0 : sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
470 : SAL_THROW_EXTERN_C()
471 : {
472 : static sal_Bool const aCharClass[][nCharClassSize]
473 : = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
474 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
476 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
477 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
478 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
479 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
480 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
481 : },
482 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
483 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
485 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
486 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
487 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
488 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
489 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
490 : },
491 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
492 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
494 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
495 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
496 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
497 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
498 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
499 : },
500 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
501 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
503 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
504 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
505 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
506 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
507 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
508 : },
509 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
510 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
512 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
513 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
514 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
515 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
516 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
517 : },
518 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
519 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
521 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
522 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
523 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
524 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
525 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
526 : },
527 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
528 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
530 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
531 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
532 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
533 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
534 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
535 : },
536 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
537 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
539 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
540 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
541 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
542 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
543 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
544 : }};
545 : assert(
546 : (eCharClass >= 0
547 : && (sal::static_int_cast< std::size_t >(eCharClass)
548 : < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
549 0 : return aCharClass[eCharClass];
550 : }
551 :
552 1615453 : void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
553 : rtl_UriEncodeMechanism eMechanism,
554 : rtl_TextEncoding eCharset, rtl_uString ** pResult)
555 : SAL_THROW_EXTERN_C()
556 : {
557 : assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
558 :
559 1615453 : sal_Unicode const * p = pText->buffer;
560 1615453 : sal_Unicode const * pEnd = p + pText->length;
561 1615453 : sal_Int32 nCapacity = pText->length;
562 1615453 : rtl_uString_new_WithLength(pResult, nCapacity);
563 1615453 : while (p < pEnd)
564 : {
565 : EscapeType eType;
566 : sal_uInt32 nUtf32 = readUcs4(
567 : &p, pEnd,
568 : (eMechanism == rtl_UriEncodeKeepEscapes
569 69336703 : || eMechanism == rtl_UriEncodeCheckEscapes
570 138673406 : || eMechanism == rtl_UriEncodeStrictKeepEscapes),
571 138673406 : eCharset, &eType);
572 69336703 : switch (eType)
573 : {
574 : case EscapeNo:
575 69336703 : if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
576 : writeUnicode(pResult, &nCapacity,
577 69336703 : static_cast< sal_Unicode >(nUtf32));
578 0 : else if (!writeEscapeChar(
579 : pResult, &nCapacity, nUtf32, eCharset,
580 : (eMechanism == rtl_UriEncodeStrict
581 0 : || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
582 : {
583 0 : rtl_uString_new(pResult);
584 0 : return;
585 : }
586 69336703 : break;
587 :
588 : case EscapeChar:
589 0 : if (eMechanism == rtl_UriEncodeCheckEscapes
590 0 : && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
591 : writeUnicode(pResult, &nCapacity,
592 0 : static_cast< sal_Unicode >(nUtf32));
593 0 : else if (!writeEscapeChar(
594 : pResult, &nCapacity, nUtf32, eCharset,
595 : (eMechanism == rtl_UriEncodeStrict
596 0 : || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
597 : {
598 0 : rtl_uString_new(pResult);
599 0 : return;
600 : }
601 0 : break;
602 :
603 : case EscapeOctet:
604 0 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
605 0 : break;
606 : }
607 : }
608 1615453 : *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
609 : }
610 :
611 2042182 : void SAL_CALL rtl_uriDecode(rtl_uString * pText,
612 : rtl_UriDecodeMechanism eMechanism,
613 : rtl_TextEncoding eCharset, rtl_uString ** pResult)
614 : SAL_THROW_EXTERN_C()
615 : {
616 2042182 : switch (eMechanism)
617 : {
618 : case rtl_UriDecodeNone:
619 0 : rtl_uString_assign(pResult, pText);
620 0 : break;
621 :
622 : case rtl_UriDecodeToIuri:
623 0 : eCharset = RTL_TEXTENCODING_UTF8;
624 : //fall-through
625 : default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
626 : {
627 2042182 : sal_Unicode const * p = pText->buffer;
628 2042182 : sal_Unicode const * pEnd = p + pText->length;
629 2042182 : sal_Int32 nCapacity = pText->length;
630 2042182 : rtl_uString_new_WithLength(pResult, nCapacity);
631 134979990 : while (p < pEnd)
632 : {
633 : EscapeType eType;
634 130895626 : sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
635 130895626 : switch (eType)
636 : {
637 : case EscapeChar:
638 0 : if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
639 : {
640 0 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
641 0 : break;
642 : }
643 : case EscapeNo:
644 130895626 : writeUcs4(pResult, &nCapacity, nUtf32);
645 130895626 : break;
646 :
647 : case EscapeOctet:
648 0 : if (eMechanism == rtl_UriDecodeStrict) {
649 0 : rtl_uString_new(pResult);
650 2042182 : return;
651 : }
652 0 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
653 0 : break;
654 : }
655 : }
656 2042182 : *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
657 : }
658 2042182 : break;
659 : }
660 : }
661 :
662 42716 : sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
663 : rtl_uString * pRelUriRef,
664 : rtl_uString ** pResult,
665 : rtl_uString ** pException)
666 : SAL_THROW_EXTERN_C()
667 : {
668 : // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the
669 : // relative URI into an absolute one:
670 42716 : rtl::OUStringBuffer aBuffer;
671 42716 : Components aRelComponents;
672 42716 : parseUriRef(pRelUriRef, &aRelComponents);
673 42716 : if (aRelComponents.aScheme.isPresent())
674 : {
675 : aBuffer.append(aRelComponents.aScheme.pBegin,
676 42712 : aRelComponents.aScheme.getLength());
677 42712 : if (aRelComponents.aAuthority.isPresent())
678 : aBuffer.append(aRelComponents.aAuthority.pBegin,
679 42510 : aRelComponents.aAuthority.getLength());
680 : appendPath(
681 : aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin,
682 42712 : aRelComponents.aPath.pEnd);
683 42712 : if (aRelComponents.aQuery.isPresent())
684 : aBuffer.append(aRelComponents.aQuery.pBegin,
685 0 : aRelComponents.aQuery.getLength());
686 : }
687 : else
688 : {
689 4 : Components aBaseComponents;
690 4 : parseUriRef(pBaseUriRef, &aBaseComponents);
691 4 : if (!aBaseComponents.aScheme.isPresent())
692 : {
693 : rtl_uString_assign(
694 : pException,
695 : (rtl::OUString(
696 0 : "<" + rtl::OUString(pBaseUriRef)
697 0 : + "> does not start with a scheme component")
698 0 : .pData));
699 0 : return false;
700 : }
701 : aBuffer.append(aBaseComponents.aScheme.pBegin,
702 4 : aBaseComponents.aScheme.getLength());
703 4 : if (aRelComponents.aAuthority.isPresent())
704 : {
705 : aBuffer.append(aRelComponents.aAuthority.pBegin,
706 0 : aRelComponents.aAuthority.getLength());
707 : appendPath(
708 : aBuffer, aBuffer.getLength(), false,
709 0 : aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
710 0 : if (aRelComponents.aQuery.isPresent())
711 : aBuffer.append(aRelComponents.aQuery.pBegin,
712 0 : aRelComponents.aQuery.getLength());
713 : }
714 : else
715 : {
716 4 : if (aBaseComponents.aAuthority.isPresent())
717 : aBuffer.append(aBaseComponents.aAuthority.pBegin,
718 4 : aBaseComponents.aAuthority.getLength());
719 4 : if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd)
720 : {
721 : aBuffer.append(aBaseComponents.aPath.pBegin,
722 0 : aBaseComponents.aPath.getLength());
723 0 : if (aRelComponents.aQuery.isPresent())
724 : aBuffer.append(aRelComponents.aQuery.pBegin,
725 0 : aRelComponents.aQuery.getLength());
726 0 : else if (aBaseComponents.aQuery.isPresent())
727 : aBuffer.append(aBaseComponents.aQuery.pBegin,
728 0 : aBaseComponents.aQuery.getLength());
729 : }
730 : else
731 : {
732 4 : if (aRelComponents.aPath.pBegin != aRelComponents.aPath.pEnd
733 4 : && *aRelComponents.aPath.pBegin == '/')
734 : appendPath(
735 : aBuffer, aBuffer.getLength(), false,
736 0 : aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
737 8 : else if (aBaseComponents.aAuthority.isPresent()
738 8 : && aBaseComponents.aPath.pBegin
739 4 : == aBaseComponents.aPath.pEnd)
740 : appendPath(
741 : aBuffer, aBuffer.getLength(), true,
742 0 : aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
743 : else
744 : {
745 4 : sal_Int32 n = aBuffer.getLength();
746 : sal_Int32 i = rtl_ustr_lastIndexOfChar_WithLength(
747 : aBaseComponents.aPath.pBegin,
748 4 : aBaseComponents.aPath.getLength(), '/');
749 4 : if (i >= 0) {
750 : appendPath(
751 : aBuffer, n, false, aBaseComponents.aPath.pBegin,
752 4 : aBaseComponents.aPath.pBegin + i);
753 : }
754 : appendPath(
755 : aBuffer, n, i >= 0, aRelComponents.aPath.pBegin,
756 4 : aRelComponents.aPath.pEnd);
757 : }
758 4 : if (aRelComponents.aQuery.isPresent())
759 : aBuffer.append(aRelComponents.aQuery.pBegin,
760 0 : aRelComponents.aQuery.getLength());
761 : }
762 : }
763 : }
764 42716 : if (aRelComponents.aFragment.isPresent())
765 : aBuffer.append(aRelComponents.aFragment.pBegin,
766 0 : aRelComponents.aFragment.getLength());
767 42716 : rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
768 42716 : return true;
769 : }
770 :
771 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|