Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "surrogates.hxx"
21 :
22 : #include "osl/diagnose.h"
23 : #include "rtl/character.hxx"
24 : #include "rtl/strbuf.hxx"
25 : #include "rtl/textenc.h"
26 : #include "rtl/textcvt.h"
27 : #include "rtl/uri.h"
28 : #include "rtl/ustrbuf.h"
29 : #include "rtl/ustrbuf.hxx"
30 : #include "rtl/ustring.h"
31 : #include "rtl/ustring.hxx"
32 : #include "sal/types.h"
33 : #include "sal/macros.h"
34 :
35 : #include <cstddef>
36 :
37 : namespace {
38 :
39 : std::size_t const nCharClassSize = 128;
40 :
41 : sal_Unicode const cEscapePrefix = 0x25; // '%'
42 :
43 97834651 : inline bool isHighSurrogate(sal_uInt32 nUtf16)
44 : {
45 97834651 : return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
46 : }
47 :
48 22 : inline bool isLowSurrogate(sal_uInt32 nUtf16)
49 : {
50 22 : return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
51 : }
52 :
53 5 : inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
54 : {
55 5 : return SAL_RTL_COMBINE_SURROGATES(high, low);
56 : }
57 :
58 43012 : inline int getHexWeight(sal_uInt32 nUtf32)
59 : {
60 43011 : return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
61 42176 : static_cast< int >(nUtf32 - 0x30) :
62 835 : nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
63 781 : static_cast< int >(nUtf32 - 0x41 + 10) :
64 54 : nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
65 54 : static_cast< int >(nUtf32 - 0x61 + 10) :
66 86023 : -1; // not a hex digit
67 : }
68 :
69 28039236 : inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
70 : {
71 28039236 : return nUtf32 < nCharClassSize && pCharClass[nUtf32];
72 : }
73 :
74 97896585 : inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
75 : sal_Unicode cChar)
76 : {
77 97896585 : rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
78 97896585 : }
79 :
80 : enum EscapeType
81 : {
82 : EscapeNo,
83 : EscapeChar,
84 : EscapeOctet
85 : };
86 :
87 : /* Read any of the following:
88 :
89 : - sequence of escape sequences representing character from eCharset,
90 : translated to single UCS4 character; or
91 :
92 : - pair of UTF-16 surrogates, translated to single UCS4 character; or
93 :
94 : _ single UTF-16 character, extended to UCS4 character.
95 : */
96 97856085 : sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
97 : bool bEncoded, rtl_TextEncoding eCharset,
98 : EscapeType * pType)
99 : {
100 97856085 : sal_uInt32 nChar = *(*pBegin)++;
101 : int nWeight1;
102 : int nWeight2;
103 97877552 : if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
104 21455 : && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
105 97877539 : && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
106 : {
107 21454 : *pBegin += 2;
108 21454 : nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
109 21454 : if (nChar <= 0x7F)
110 21405 : *pType = EscapeChar;
111 49 : else if (eCharset == RTL_TEXTENCODING_UTF8)
112 : {
113 39 : if (nChar >= 0xC0 && nChar <= 0xF4)
114 : {
115 : sal_uInt32 nEncoded;
116 : int nShift;
117 : sal_uInt32 nMin;
118 22 : if (nChar <= 0xDF)
119 : {
120 4 : nEncoded = (nChar & 0x1F) << 6;
121 4 : nShift = 0;
122 4 : nMin = 0x80;
123 : }
124 18 : else if (nChar <= 0xEF)
125 : {
126 16 : nEncoded = (nChar & 0x0F) << 12;
127 16 : nShift = 6;
128 16 : nMin = 0x800;
129 : }
130 : else
131 : {
132 2 : nEncoded = (nChar & 0x07) << 18;
133 2 : nShift = 12;
134 2 : nMin = 0x10000;
135 : }
136 22 : sal_Unicode const * p = *pBegin;
137 22 : bool bUTF8 = true;
138 62 : for (; nShift >= 0; nShift -= 6)
139 : {
140 123 : if (pEnd - p < 3 || p[0] != cEscapePrefix
141 41 : || (nWeight1 = getHexWeight(p[1])) < 8
142 41 : || nWeight1 > 11
143 81 : || (nWeight2 = getHexWeight(p[2])) < 0)
144 : {
145 1 : bUTF8 = sal_False;
146 1 : break;
147 : }
148 40 : p += 3;
149 40 : nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
150 : }
151 43 : if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
152 37 : && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
153 : {
154 13 : *pBegin = p;
155 13 : *pType = EscapeChar;
156 13 : return nEncoded;
157 : }
158 : }
159 26 : *pType = EscapeOctet;
160 : }
161 : else
162 : {
163 10 : rtl::OStringBuffer aBuf;
164 10 : aBuf.append(static_cast< char >(nChar));
165 : rtl_TextToUnicodeConverter aConverter
166 10 : = rtl_createTextToUnicodeConverter(eCharset);
167 10 : sal_Unicode const * p = *pBegin;
168 : for (;;)
169 : {
170 : sal_Unicode aDst[2];
171 : sal_uInt32 nInfo;
172 : sal_Size nConverted;
173 : sal_Size nDstSize = rtl_convertTextToUnicode(
174 24 : aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
175 : SAL_N_ELEMENTS( aDst ),
176 : (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
177 : | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
178 : | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
179 24 : &nInfo, &nConverted);
180 24 : if (nInfo == 0)
181 : {
182 : assert( nConverted
183 : == sal::static_int_cast< sal_uInt32 >(
184 : aBuf.getLength()));
185 8 : rtl_destroyTextToUnicodeConverter(aConverter);
186 8 : *pBegin = p;
187 8 : *pType = EscapeChar;
188 : assert( nDstSize == 1
189 : || (nDstSize == 2 && isHighSurrogate(aDst[0])
190 : && isLowSurrogate(aDst[1])));
191 : return nDstSize == 1
192 8 : ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
193 : }
194 32 : else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
195 14 : && pEnd - p >= 3 && p[0] == cEscapePrefix
196 11 : && (nWeight1 = getHexWeight(p[1])) >= 0
197 27 : && (nWeight2 = getHexWeight(p[2])) >= 0)
198 : {
199 11 : p += 3;
200 11 : aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
201 : }
202 5 : else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
203 3 : && p != pEnd && *p <= 0x7F)
204 : {
205 3 : aBuf.append(static_cast< char >(*p++));
206 : }
207 : else
208 : {
209 : assert(
210 : (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
211 : == 0);
212 2 : break;
213 : }
214 14 : }
215 2 : rtl_destroyTextToUnicodeConverter(aConverter);
216 2 : *pType = EscapeOctet;
217 : }
218 21433 : return nChar;
219 : }
220 : else
221 : {
222 97834631 : *pType = EscapeNo;
223 97834638 : return isHighSurrogate(nChar) && *pBegin < pEnd
224 7 : && isLowSurrogate(**pBegin) ?
225 97834635 : combineSurrogates(nChar, *(*pBegin)++) : nChar;
226 : }
227 : }
228 :
229 69816764 : void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
230 : {
231 : assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
232 69816764 : if (nUtf32 <= 0xFFFF) {
233 : writeUnicode(
234 69816761 : pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
235 : } else {
236 3 : nUtf32 -= 0x10000;
237 : writeUnicode(
238 : pBuffer, pCapacity,
239 3 : static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
240 : writeUnicode(
241 : pBuffer, pCapacity,
242 3 : static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
243 : }
244 69816764 : }
245 :
246 20235 : void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
247 : sal_uInt32 nOctet)
248 : {
249 : assert(nOctet <= 0xFF); // bad octet
250 :
251 : static sal_Unicode const aHex[16]
252 : = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
253 : 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
254 :
255 20235 : writeUnicode(pBuffer, pCapacity, cEscapePrefix);
256 20235 : writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
257 20235 : writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
258 20235 : }
259 :
260 20180 : bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
261 : sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
262 : {
263 : assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
264 20180 : if (eCharset == RTL_TEXTENCODING_UTF8) {
265 6080 : if (nUtf32 < 0x80)
266 6067 : writeEscapeOctet(pBuffer, pCapacity, nUtf32);
267 13 : else if (nUtf32 < 0x800)
268 : {
269 3 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
270 3 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
271 : }
272 10 : else if (nUtf32 < 0x10000)
273 : {
274 7 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
275 7 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
276 7 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
277 : }
278 : else
279 : {
280 3 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
281 3 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
282 3 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
283 3 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
284 : }
285 : } else {
286 : rtl_UnicodeToTextConverter aConverter
287 14100 : = rtl_createUnicodeToTextConverter(eCharset);
288 : sal_Unicode aSrc[2];
289 : sal_Size nSrcSize;
290 14100 : if (nUtf32 <= 0xFFFF)
291 : {
292 14099 : aSrc[0] = static_cast< sal_Unicode >(nUtf32);
293 14099 : nSrcSize = 1;
294 : }
295 : else
296 : {
297 : aSrc[0] = static_cast< sal_Unicode >(
298 1 : ((nUtf32 - 0x10000) >> 10) | 0xD800);
299 : aSrc[1] = static_cast< sal_Unicode >(
300 1 : ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
301 1 : nSrcSize = 2;
302 : }
303 : sal_Char aDst[32]; // FIXME random value
304 : sal_uInt32 nInfo;
305 : sal_Size nConverted;
306 : sal_Size nDstSize = rtl_convertUnicodeToText(
307 : aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
308 : RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
309 : | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
310 : | RTL_UNICODETOTEXT_FLAGS_FLUSH,
311 14100 : &nInfo, &nConverted);
312 : assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
313 14100 : rtl_destroyUnicodeToTextConverter(aConverter);
314 14100 : if (nInfo == 0) {
315 : assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
316 28200 : for (sal_Size i = 0; i < nDstSize; ++i)
317 : writeEscapeOctet(pBuffer, pCapacity,
318 14103 : static_cast< unsigned char >(aDst[i]));
319 : // FIXME all octets are escaped, even if there is no need
320 : } else {
321 3 : if (bStrict) {
322 2 : return false;
323 : } else {
324 1 : writeUcs4(pBuffer, pCapacity, nUtf32);
325 : }
326 : }
327 : }
328 20178 : return true;
329 : }
330 :
331 : struct Component
332 : {
333 : sal_Unicode const * pBegin;
334 : sal_Unicode const * pEnd;
335 :
336 339755 : inline Component(): pBegin(0), pEnd(0) {}
337 :
338 153817 : inline bool isPresent() const { return pBegin != 0; }
339 :
340 : inline sal_Int32 getLength() const;
341 : };
342 :
343 85872 : inline sal_Int32 Component::getLength() const
344 : {
345 : assert(isPresent()); // taking length of non-present component
346 85872 : return static_cast< sal_Int32 >(pEnd - pBegin);
347 : }
348 :
349 67951 : struct Components
350 : {
351 : Component aScheme;
352 : Component aAuthority;
353 : Component aPath;
354 : Component aQuery;
355 : Component aFragment;
356 : };
357 :
358 67951 : void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
359 : {
360 : // This algorithm is liberal and accepts various forms of illegal input.
361 :
362 67951 : sal_Unicode const * pBegin = pUriRef->buffer;
363 67951 : sal_Unicode const * pEnd = pBegin + pUriRef->length;
364 67951 : sal_Unicode const * pPos = pBegin;
365 :
366 67951 : if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
367 : {
368 747175 : for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
369 : {
370 736812 : if (*p == ':')
371 : {
372 46482 : pComponents->aScheme.pBegin = pBegin;
373 46482 : pComponents->aScheme.pEnd = ++p;
374 46482 : pPos = p;
375 46482 : break;
376 : }
377 1474520 : else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
378 784190 : && *p != '.')
379 : {
380 11028 : break;
381 : }
382 : }
383 : }
384 :
385 67951 : if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
386 : {
387 22151 : pComponents->aAuthority.pBegin = pPos;
388 22151 : pPos += 2;
389 44346 : while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
390 44 : ++pPos;
391 22151 : pComponents->aAuthority.pEnd = pPos;
392 : }
393 :
394 67951 : pComponents->aPath.pBegin = pPos;
395 2763486 : while (pPos != pEnd && *pPos != '?' && * pPos != '#')
396 2627584 : ++pPos;
397 67951 : pComponents->aPath.pEnd = pPos;
398 :
399 67951 : if (pPos != pEnd && *pPos == '?')
400 : {
401 48 : pComponents->aQuery.pBegin = pPos++;
402 153 : while (pPos != pEnd && * pPos != '#')
403 57 : ++pPos;
404 48 : pComponents->aQuery.pEnd = pPos;
405 : }
406 :
407 67951 : if (pPos != pEnd)
408 : {
409 : assert(*pPos == '#');
410 6 : pComponents->aFragment.pBegin = pPos;
411 6 : pComponents->aFragment.pEnd = pEnd;
412 : }
413 67951 : }
414 :
415 21460 : rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath)
416 : {
417 : assert(rBasePath.isPresent() && *rBasePath.pBegin == '/');
418 : assert(rRelPath.isPresent());
419 :
420 : // The invariant of aBuffer is that it always starts and ends with a slash
421 : // (until probably right at the end of the algorithm, when the last segment
422 : // of rRelPath is added, which does not necessarily end in a slash):
423 21460 : rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
424 : // XXX numeric overflow
425 :
426 : // Segments "." and ".." within rBasePath are not conisdered special (but
427 : // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
428 : // bit unclear about this point:
429 21460 : sal_Int32 nFixed = 1;
430 21460 : sal_Unicode const * p = rBasePath.pBegin + 1;
431 1562224 : for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
432 1540764 : if (*q == '/')
433 : {
434 170454 : if (
435 340979 : (q - p == 1 && p[0] == '.') ||
436 170506 : (q - p == 2 && p[0] == '.' && p[1] == '.')
437 : )
438 : {
439 54 : nFixed = q + 1 - rBasePath.pBegin;
440 : }
441 170454 : p = q + 1;
442 : }
443 21460 : aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin);
444 :
445 21460 : p = rRelPath.pBegin;
446 21460 : if (p != rRelPath.pEnd)
447 : for (;;)
448 : {
449 21547 : sal_Unicode const * q = p;
450 : sal_Unicode const * r;
451 : for (;;)
452 : {
453 295767 : if (q == rRelPath.pEnd)
454 : {
455 21459 : r = q;
456 21459 : break;
457 : }
458 274308 : if (*q == '/')
459 : {
460 88 : r = q + 1;
461 88 : break;
462 : }
463 274220 : ++q;
464 : }
465 21547 : if (q - p == 2 && p[0] == '.' && p[1] == '.')
466 : {
467 : // Erroneous excess segments ".." within rRelPath are left
468 : // intact, as the examples in RFC 2396, section C.2, suggest:
469 24 : sal_Int32 i = aBuffer.getLength() - 1;
470 24 : if (i < nFixed)
471 : {
472 6 : aBuffer.append(p, r - p);
473 6 : nFixed += 3;
474 : }
475 : else
476 : {
477 58 : while (i > 0 && aBuffer[i - 1] != '/')
478 22 : --i;
479 18 : aBuffer.setLength(i);
480 24 : }
481 : }
482 21523 : else if (q - p != 1 || *p != '.')
483 21461 : aBuffer.append(p, r - p);
484 21547 : if (q == rRelPath.pEnd)
485 21459 : break;
486 88 : p = q + 1;
487 274308 : }
488 :
489 21460 : return aBuffer.makeStringAndClear();
490 : }
491 :
492 : }
493 :
494 10443 : sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
495 : SAL_THROW_EXTERN_C()
496 : {
497 : static sal_Bool const aCharClass[][nCharClassSize]
498 : = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
499 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
500 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
501 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
502 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
503 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
504 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
505 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
506 : },
507 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
508 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
509 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
510 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
511 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
512 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
513 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
514 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
515 : },
516 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
517 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
518 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
519 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
520 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
521 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
522 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
523 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
524 : },
525 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
526 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
527 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
528 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
529 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
530 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
531 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
532 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
533 : },
534 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
535 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
536 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
537 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
538 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
539 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
540 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
541 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
542 : },
543 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
544 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
545 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
546 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
547 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
548 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
549 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
550 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
551 : },
552 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
553 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
554 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
555 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
556 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
557 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
558 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
559 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
560 : },
561 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
562 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
563 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
564 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
565 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
566 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
567 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
568 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
569 : }};
570 : assert(
571 : (eCharClass >= 0
572 : && (sal::static_int_cast< std::size_t >(eCharClass)
573 : < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
574 10443 : return aCharClass[eCharClass];
575 : }
576 :
577 341377 : void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
578 : rtl_UriEncodeMechanism eMechanism,
579 : rtl_TextEncoding eCharset, rtl_uString ** pResult)
580 : SAL_THROW_EXTERN_C()
581 : {
582 : assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
583 :
584 341377 : sal_Unicode const * p = pText->buffer;
585 341377 : sal_Unicode const * pEnd = p + pText->length;
586 341377 : sal_Int32 nCapacity = pText->length;
587 341377 : rtl_uString_new_WithLength(pResult, nCapacity);
588 341377 : while (p < pEnd)
589 : {
590 : EscapeType eType;
591 : sal_uInt32 nUtf32 = readUcs4(
592 : &p, pEnd,
593 : (eMechanism == rtl_UriEncodeKeepEscapes
594 28001275 : || eMechanism == rtl_UriEncodeCheckEscapes
595 56011888 : || eMechanism == rtl_UriEncodeStrictKeepEscapes),
596 56078594 : eCharset, &eType);
597 28039297 : switch (eType)
598 : {
599 : case EscapeNo:
600 28039233 : if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
601 : writeUnicode(pResult, &nCapacity,
602 28019113 : static_cast< sal_Unicode >(nUtf32));
603 20120 : else if (!writeEscapeChar(
604 : pResult, &nCapacity, nUtf32, eCharset,
605 : (eMechanism == rtl_UriEncodeStrict
606 20120 : || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
607 : {
608 2 : rtl_uString_new(pResult);
609 2 : return;
610 : }
611 28039231 : break;
612 :
613 : case EscapeChar:
614 60 : if (eMechanism == rtl_UriEncodeCheckEscapes
615 60 : && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
616 : writeUnicode(pResult, &nCapacity,
617 0 : static_cast< sal_Unicode >(nUtf32));
618 60 : else if (!writeEscapeChar(
619 : pResult, &nCapacity, nUtf32, eCharset,
620 : (eMechanism == rtl_UriEncodeStrict
621 60 : || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
622 : {
623 0 : rtl_uString_new(pResult);
624 0 : return;
625 : }
626 60 : break;
627 :
628 : case EscapeOctet:
629 4 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
630 4 : break;
631 : }
632 : }
633 341375 : *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
634 : }
635 :
636 867517 : void SAL_CALL rtl_uriDecode(rtl_uString * pText,
637 : rtl_UriDecodeMechanism eMechanism,
638 : rtl_TextEncoding eCharset, rtl_uString ** pResult)
639 : SAL_THROW_EXTERN_C()
640 : {
641 867517 : switch (eMechanism)
642 : {
643 : case rtl_UriDecodeNone:
644 0 : rtl_uString_assign(pResult, pText);
645 0 : break;
646 :
647 : case rtl_UriDecodeToIuri:
648 5 : eCharset = RTL_TEXTENCODING_UTF8;
649 : default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
650 : {
651 867517 : sal_Unicode const * p = pText->buffer;
652 867517 : sal_Unicode const * pEnd = p + pText->length;
653 867517 : sal_Int32 nCapacity = pText->length;
654 867517 : rtl_uString_new_WithLength(pResult, nCapacity);
655 71551819 : while (p < pEnd)
656 : {
657 : EscapeType eType;
658 69816788 : sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
659 69816788 : switch (eType)
660 : {
661 : case EscapeChar:
662 21366 : if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
663 : {
664 1 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
665 1 : break;
666 : }
667 : case EscapeNo:
668 69816763 : writeUcs4(pResult, &nCapacity, nUtf32);
669 69816763 : break;
670 :
671 : case EscapeOctet:
672 24 : if (eMechanism == rtl_UriDecodeStrict) {
673 3 : rtl_uString_new(pResult);
674 867520 : return;
675 : }
676 21 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
677 21 : break;
678 : }
679 : }
680 867514 : *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
681 : }
682 867514 : break;
683 : }
684 : }
685 :
686 46483 : sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
687 : rtl_uString * pRelUriRef,
688 : rtl_uString ** pResult,
689 : rtl_uString ** pException)
690 : SAL_THROW_EXTERN_C()
691 : {
692 : // If pRelUriRef starts with a scheme component it is an absolute URI
693 : // reference, and we are done (i.e., this algorithm does not support
694 : // backwards-compatible relative URIs starting with a scheme component, see
695 : // RFC 2396, section 5.2, step 3):
696 46483 : Components aRelComponents;
697 46483 : parseUriRef(pRelUriRef, &aRelComponents);
698 46483 : if (aRelComponents.aScheme.isPresent())
699 : {
700 25015 : rtl_uString_assign(pResult, pRelUriRef);
701 25015 : return true;
702 : }
703 :
704 : // Parse pBaseUriRef; if the scheme component is not present or not valid,
705 : // or the path component is not empty and starts with anything but a slash,
706 : // an exception is raised:
707 21468 : Components aBaseComponents;
708 21468 : parseUriRef(pBaseUriRef, &aBaseComponents);
709 21468 : if (!aBaseComponents.aScheme.isPresent())
710 : {
711 1 : rtl::OUString aMessage(pBaseUriRef);
712 2 : aMessage += rtl::OUString(
713 1 : " does not start with a scheme component");
714 : rtl_uString_assign(pException,
715 1 : const_cast< rtl::OUString & >(aMessage).pData);
716 1 : return false;
717 : }
718 21467 : if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
719 21467 : && *aBaseComponents.aPath.pBegin != '/')
720 : {
721 1 : rtl::OUString aMessage(pBaseUriRef);
722 2 : aMessage += rtl::OUString(
723 1 : "path component does not start with slash");
724 1 : rtl_uString_assign(pException, aMessage.pData);
725 1 : return false;
726 : }
727 :
728 : // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
729 : // into an absolute one (if the relative URI is a reference to the "current
730 : // document," the "current document" is here taken to be the base URI):
731 21466 : rtl::OUStringBuffer aBuffer;
732 : aBuffer.append(aBaseComponents.aScheme.pBegin,
733 21466 : aBaseComponents.aScheme.getLength());
734 21466 : if (aRelComponents.aAuthority.isPresent())
735 : {
736 : aBuffer.append(aRelComponents.aAuthority.pBegin,
737 1 : aRelComponents.aAuthority.getLength());
738 : aBuffer.append(aRelComponents.aPath.pBegin,
739 1 : aRelComponents.aPath.getLength());
740 1 : if (aRelComponents.aQuery.isPresent())
741 : aBuffer.append(aRelComponents.aQuery.pBegin,
742 0 : aRelComponents.aQuery.getLength());
743 : }
744 : else
745 : {
746 21465 : if (aBaseComponents.aAuthority.isPresent())
747 : aBuffer.append(aBaseComponents.aAuthority.pBegin,
748 21465 : aBaseComponents.aAuthority.getLength());
749 42930 : if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
750 21465 : && !aRelComponents.aQuery.isPresent())
751 : {
752 : aBuffer.append(aBaseComponents.aPath.pBegin,
753 2 : aBaseComponents.aPath.getLength());
754 2 : if (aBaseComponents.aQuery.isPresent())
755 : aBuffer.append(aBaseComponents.aQuery.pBegin,
756 2 : aBaseComponents.aQuery.getLength());
757 : }
758 : else
759 : {
760 21463 : if (*aRelComponents.aPath.pBegin == '/')
761 : aBuffer.append(aRelComponents.aPath.pBegin,
762 3 : aRelComponents.aPath.getLength());
763 : else
764 : aBuffer.append(joinPaths(aBaseComponents.aPath,
765 21460 : aRelComponents.aPath));
766 21463 : if (aRelComponents.aQuery.isPresent())
767 : aBuffer.append(aRelComponents.aQuery.pBegin,
768 6 : aRelComponents.aQuery.getLength());
769 : }
770 : }
771 21466 : if (aRelComponents.aFragment.isPresent())
772 : aBuffer.append(aRelComponents.aFragment.pBegin,
773 6 : aRelComponents.aFragment.getLength());
774 21466 : rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
775 21466 : return true;
776 : }
777 :
778 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|