Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "osl/diagnose.h"
21 : #include "rtl/character.hxx"
22 : #include "rtl/strbuf.hxx"
23 : #include "rtl/textenc.h"
24 : #include "rtl/textcvt.h"
25 : #include "rtl/uri.h"
26 : #include "rtl/ustrbuf.h"
27 : #include "rtl/ustrbuf.hxx"
28 : #include "rtl/ustring.h"
29 : #include "rtl/ustring.hxx"
30 : #include "sal/types.h"
31 : #include "sal/macros.h"
32 :
33 : #include <algorithm>
34 : #include <cstddef>
35 :
36 : namespace {
37 :
38 : std::size_t const nCharClassSize = 128;
39 :
40 : sal_Unicode const cEscapePrefix = 0x25; // '%'
41 :
42 25094 : inline int getHexWeight(sal_uInt32 nUtf32)
43 : {
44 25013 : return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
45 15924 : static_cast< int >(nUtf32 - 0x30) :
46 9089 : nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
47 9035 : static_cast< int >(nUtf32 - 0x41 + 10) :
48 54 : nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
49 54 : static_cast< int >(nUtf32 - 0x61 + 10) :
50 50107 : -1; // not a hex digit
51 : }
52 :
53 32338570 : inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
54 : {
55 32338570 : return nUtf32 < nCharClassSize && pCharClass[nUtf32];
56 : }
57 :
58 119285446 : inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
59 : sal_Unicode cChar)
60 : {
61 119285446 : rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
62 119285443 : }
63 :
64 : enum EscapeType
65 : {
66 : EscapeNo,
67 : EscapeChar,
68 : EscapeOctet
69 : };
70 :
71 : /* Read any of the following:
72 :
73 : - sequence of escape sequences representing character from eCharset,
74 : translated to single UCS4 character; or
75 :
76 : - pair of UTF-16 surrogates, translated to single UCS4 character; or
77 :
78 : _ single UTF-16 character, extended to UCS4 character.
79 : */
80 119258688 : sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
81 : bool bEncoded, rtl_TextEncoding eCharset,
82 : EscapeType * pType)
83 : {
84 119258688 : sal_uInt32 nChar = *(*pBegin)++;
85 : int nWeight1;
86 : int nWeight2;
87 119269173 : if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
88 10473 : && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
89 119269080 : && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
90 : {
91 10392 : *pBegin += 2;
92 10392 : nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
93 10392 : if (nChar <= 0x7F)
94 6232 : *pType = EscapeChar;
95 4160 : else if (eCharset == RTL_TEXTENCODING_UTF8)
96 : {
97 2102 : if (nChar >= 0xC0 && nChar <= 0xF4)
98 : {
99 : sal_uInt32 nEncoded;
100 : int nShift;
101 : sal_uInt32 nMin;
102 2085 : if (nChar <= 0xDF)
103 : {
104 2067 : nEncoded = (nChar & 0x1F) << 6;
105 2067 : nShift = 0;
106 2067 : nMin = 0x80;
107 : }
108 18 : else if (nChar <= 0xEF)
109 : {
110 16 : nEncoded = (nChar & 0x0F) << 12;
111 16 : nShift = 6;
112 16 : nMin = 0x800;
113 : }
114 : else
115 : {
116 2 : nEncoded = (nChar & 0x07) << 18;
117 2 : nShift = 12;
118 2 : nMin = 0x10000;
119 : }
120 2085 : sal_Unicode const * p = *pBegin;
121 2085 : bool bUTF8 = true;
122 4188 : for (; nShift >= 0; nShift -= 6)
123 : {
124 6312 : if (pEnd - p < 3 || p[0] != cEscapePrefix
125 2104 : || (nWeight1 = getHexWeight(p[1])) < 8
126 2104 : || nWeight1 > 11
127 4207 : || (nWeight2 = getHexWeight(p[2])) < 0)
128 : {
129 1 : bUTF8 = false;
130 1 : break;
131 : }
132 2103 : p += 3;
133 2103 : nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
134 : }
135 4169 : if (bUTF8 && nEncoded >= nMin && nEncoded <= 0x10FFFF
136 2083 : && !rtl::isHighSurrogate(nEncoded)
137 4163 : && !rtl::isLowSurrogate(nEncoded))
138 : {
139 2076 : *pBegin = p;
140 2076 : *pType = EscapeChar;
141 2076 : return nEncoded;
142 : }
143 : }
144 26 : *pType = EscapeOctet;
145 : }
146 : else
147 : {
148 2058 : rtl::OStringBuffer aBuf;
149 2058 : aBuf.append(static_cast< char >(nChar));
150 : rtl_TextToUnicodeConverter aConverter
151 2058 : = rtl_createTextToUnicodeConverter(eCharset);
152 2058 : sal_Unicode const * p = *pBegin;
153 : for (;;)
154 : {
155 : sal_Unicode aDst[2];
156 : sal_uInt32 nInfo;
157 : sal_Size nConverted;
158 : sal_Size nDstSize = rtl_convertTextToUnicode(
159 2072 : aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
160 : SAL_N_ELEMENTS( aDst ),
161 : (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
162 : | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
163 : | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
164 2072 : &nInfo, &nConverted);
165 2072 : if (nInfo == 0)
166 : {
167 : assert( nConverted
168 : == sal::static_int_cast< sal_uInt32 >(
169 : aBuf.getLength()));
170 2056 : rtl_destroyTextToUnicodeConverter(aConverter);
171 2056 : *pBegin = p;
172 2056 : *pType = EscapeChar;
173 : assert( nDstSize == 1
174 : || (nDstSize == 2 && rtl::isHighSurrogate(aDst[0])
175 : && rtl::isLowSurrogate(aDst[1])));
176 : return nDstSize == 1
177 2056 : ? aDst[0] : rtl::combineSurrogates(aDst[0], aDst[1]);
178 : }
179 32 : else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
180 14 : && pEnd - p >= 3 && p[0] == cEscapePrefix
181 11 : && (nWeight1 = getHexWeight(p[1])) >= 0
182 27 : && (nWeight2 = getHexWeight(p[2])) >= 0)
183 : {
184 11 : p += 3;
185 11 : aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
186 : }
187 5 : else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
188 3 : && p != pEnd && *p <= 0x7F)
189 : {
190 3 : aBuf.append(static_cast< char >(*p++));
191 : }
192 : else
193 : {
194 : assert(
195 : (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
196 : == 0);
197 2 : break;
198 : }
199 14 : }
200 2 : rtl_destroyTextToUnicodeConverter(aConverter);
201 2 : *pType = EscapeOctet;
202 : }
203 6260 : return nChar;
204 : }
205 : else
206 : {
207 119248296 : *pType = EscapeNo;
208 119248303 : return rtl::isHighSurrogate(nChar) && *pBegin < pEnd
209 7 : && rtl::isLowSurrogate(**pBegin) ?
210 119248303 : rtl::combineSurrogates(nChar, *(*pBegin)++) : nChar;
211 : }
212 : }
213 :
214 86920096 : void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
215 : {
216 : assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
217 86920096 : if (nUtf32 <= 0xFFFF) {
218 : writeUnicode(
219 86920093 : pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
220 : } else {
221 3 : nUtf32 -= 0x10000;
222 : writeUnicode(
223 : pBuffer, pCapacity,
224 3 : static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
225 : writeUnicode(
226 : pBuffer, pCapacity,
227 3 : static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
228 : }
229 86920096 : }
230 :
231 12339 : void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
232 : sal_uInt32 nOctet)
233 : {
234 : assert(nOctet <= 0xFF); // bad octet
235 :
236 : static sal_Unicode const aHex[16]
237 : = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
238 : 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
239 :
240 12339 : writeUnicode(pBuffer, pCapacity, cEscapePrefix);
241 12339 : writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
242 12339 : writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
243 12339 : }
244 :
245 10236 : bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
246 : sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
247 : {
248 : assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
249 10236 : if (eCharset == RTL_TEXTENCODING_UTF8) {
250 4408 : if (nUtf32 < 0x80)
251 2347 : writeEscapeOctet(pBuffer, pCapacity, nUtf32);
252 2061 : else if (nUtf32 < 0x800)
253 : {
254 2051 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
255 2051 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
256 : }
257 10 : else if (nUtf32 < 0x10000)
258 : {
259 7 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
260 7 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
261 7 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
262 : }
263 : else
264 : {
265 3 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
266 3 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
267 3 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
268 3 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
269 : }
270 : } else {
271 : rtl_UnicodeToTextConverter aConverter
272 5828 : = rtl_createUnicodeToTextConverter(eCharset);
273 : sal_Unicode aSrc[2];
274 : sal_Size nSrcSize;
275 5828 : if (nUtf32 <= 0xFFFF)
276 : {
277 5827 : aSrc[0] = static_cast< sal_Unicode >(nUtf32);
278 5827 : nSrcSize = 1;
279 : }
280 : else
281 : {
282 : aSrc[0] = static_cast< sal_Unicode >(
283 1 : ((nUtf32 - 0x10000) >> 10) | 0xD800);
284 : aSrc[1] = static_cast< sal_Unicode >(
285 1 : ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
286 1 : nSrcSize = 2;
287 : }
288 : sal_Char aDst[32]; // FIXME random value
289 : sal_uInt32 nInfo;
290 : sal_Size nConverted;
291 : sal_Size nDstSize = rtl_convertUnicodeToText(
292 : aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
293 : RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
294 : | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
295 : | RTL_UNICODETOTEXT_FLAGS_FLUSH,
296 5828 : &nInfo, &nConverted);
297 : assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
298 5828 : rtl_destroyUnicodeToTextConverter(aConverter);
299 5828 : if (nInfo == 0) {
300 : assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
301 11656 : for (sal_Size i = 0; i < nDstSize; ++i)
302 : writeEscapeOctet(pBuffer, pCapacity,
303 5831 : static_cast< unsigned char >(aDst[i]));
304 : // FIXME all octets are escaped, even if there is no need
305 : } else {
306 3 : if (bStrict) {
307 2 : return false;
308 : } else {
309 1 : writeUcs4(pBuffer, pCapacity, nUtf32);
310 : }
311 : }
312 : }
313 10234 : return true;
314 : }
315 :
316 : struct Component
317 : {
318 : sal_Unicode const * pBegin;
319 : sal_Unicode const * pEnd;
320 :
321 628150 : inline Component(): pBegin(0), pEnd(0) {}
322 :
323 456801 : inline bool isPresent() const { return pBegin != 0; }
324 :
325 : inline sal_Int32 getLength() const;
326 : };
327 :
328 171362 : inline sal_Int32 Component::getLength() const
329 : {
330 : assert(isPresent()); // taking length of non-present component
331 171362 : return static_cast< sal_Int32 >(pEnd - pBegin);
332 : }
333 :
334 125630 : struct Components
335 : {
336 : Component aScheme;
337 : Component aAuthority;
338 : Component aPath;
339 : Component aQuery;
340 : Component aFragment;
341 : };
342 :
343 125630 : void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
344 : {
345 : // This algorithm is liberal and accepts various forms of illegal input.
346 :
347 125630 : sal_Unicode const * pBegin = pUriRef->buffer;
348 125630 : sal_Unicode const * pEnd = pBegin + pUriRef->length;
349 125630 : sal_Unicode const * pPos = pBegin;
350 :
351 125630 : if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
352 : {
353 1282710 : for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
354 : {
355 1251464 : if (*p == ':')
356 : {
357 79921 : pComponents->aScheme.pBegin = pBegin;
358 79921 : pComponents->aScheme.pEnd = ++p;
359 79921 : pPos = p;
360 79921 : break;
361 : }
362 2490554 : else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
363 1319011 : && *p != '.')
364 : {
365 14437 : break;
366 : }
367 : }
368 : }
369 :
370 125630 : if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
371 : {
372 45726 : pComponents->aAuthority.pBegin = pPos;
373 45726 : pPos += 2;
374 91548 : while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
375 96 : ++pPos;
376 45726 : pComponents->aAuthority.pEnd = pPos;
377 : }
378 :
379 125630 : pComponents->aPath.pBegin = pPos;
380 4711316 : while (pPos != pEnd && *pPos != '?' && * pPos != '#')
381 4460056 : ++pPos;
382 125630 : pComponents->aPath.pEnd = pPos;
383 :
384 125630 : if (pPos != pEnd && *pPos == '?')
385 : {
386 48 : pComponents->aQuery.pBegin = pPos++;
387 153 : while (pPos != pEnd && * pPos != '#')
388 57 : ++pPos;
389 48 : pComponents->aQuery.pEnd = pPos;
390 : }
391 :
392 125630 : if (pPos != pEnd)
393 : {
394 : assert(*pPos == '#');
395 6 : pComponents->aFragment.pBegin = pPos;
396 6 : pComponents->aFragment.pEnd = pEnd;
397 : }
398 125630 : }
399 :
400 125617 : void appendPath(
401 : rtl::OUStringBuffer & buffer, sal_Int32 bufferStart, bool precedingSlash,
402 : sal_Unicode const * pathBegin, sal_Unicode const * pathEnd)
403 : {
404 730162 : while (precedingSlash || pathBegin != pathEnd) {
405 478928 : sal_Unicode const * p = pathBegin;
406 4474019 : while (p != pathEnd && *p != '/') {
407 3516163 : ++p;
408 : }
409 478928 : std::size_t n = p - pathBegin;
410 478928 : if (n == 1 && pathBegin[0] == '.') {
411 : // input begins with "." -> remove from input (and done):
412 : // i.e., !precedingSlash -> !precedingSlash
413 : // input begins with "./" -> remove from input:
414 : // i.e., !precedingSlash -> !precedingSlash
415 : // input begins with "/." -> replace with "/" in input (and not yet
416 : // done):
417 : // i.e., precedingSlash -> precedingSlash
418 : // input begins with "/./" -> replace with "/" in input:
419 : // i.e., precedingSlash -> precedingSlash
420 478916 : } else if (n == 2 && pathBegin[0] == '.' && pathBegin[1] == '.') {
421 : // input begins with ".." -> remove from input (and done):
422 : // i.e., !precedingSlash -> !precedingSlash
423 : // input begins with "../" -> remove from input
424 : // i.e., !precedingSlash -> !precedingSlash
425 : // input begins with "/.." -> replace with "/" in input, and shrink
426 : // output (not yet done):
427 : // i.e., precedingSlash -> precedingSlash
428 : // input begins with "/../" -> replace with "/" in input, and shrink
429 : // output:
430 : // i.e., precedingSlash -> precedingSlash
431 54 : if (precedingSlash) {
432 : buffer.truncate(
433 : bufferStart
434 : + std::max<sal_Int32>(
435 : rtl_ustr_lastIndexOfChar_WithLength(
436 27 : buffer.getStr() + bufferStart,
437 54 : buffer.getLength() - bufferStart, '/'),
438 54 : 0));
439 : }
440 : } else {
441 478889 : if (precedingSlash) {
442 398973 : buffer.append('/');
443 : }
444 478889 : buffer.append(pathBegin, n);
445 478889 : precedingSlash = p != pathEnd;
446 : }
447 478928 : pathBegin = p + (p == pathEnd ? 0 : 1);
448 : }
449 125617 : }
450 :
451 : }
452 :
453 18338 : sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
454 : SAL_THROW_EXTERN_C()
455 : {
456 : static sal_Bool const aCharClass[][nCharClassSize]
457 : = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
458 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
459 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
460 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
461 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
462 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
463 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
464 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
465 : },
466 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
467 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
468 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
469 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
470 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
471 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
472 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
473 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
474 : },
475 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
476 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
477 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
478 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
479 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
480 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
481 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
482 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
483 : },
484 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
485 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
486 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
487 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
488 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
489 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
490 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
491 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
492 : },
493 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
494 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
495 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
496 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
497 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
498 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
499 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
500 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
501 : },
502 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
503 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
504 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
505 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
506 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
507 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
508 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
509 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
510 : },
511 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
512 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
513 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
514 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
515 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
516 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
517 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
518 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
519 : },
520 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
521 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
522 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
523 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
524 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
525 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
526 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
527 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
528 : }};
529 : assert(
530 : (eCharClass >= 0
531 : && (sal::static_int_cast< std::size_t >(eCharClass)
532 : < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
533 18338 : return aCharClass[eCharClass];
534 : }
535 :
536 452528 : void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
537 : rtl_UriEncodeMechanism eMechanism,
538 : rtl_TextEncoding eCharset, rtl_uString ** pResult)
539 : SAL_THROW_EXTERN_C()
540 : {
541 : assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
542 :
543 452528 : sal_Unicode const * p = pText->buffer;
544 452528 : sal_Unicode const * pEnd = p + pText->length;
545 452528 : sal_Int32 nCapacity = pText->length;
546 452528 : rtl_uString_new_WithLength(pResult, nCapacity);
547 452528 : while (p < pEnd)
548 : {
549 : EscapeType eType;
550 : sal_uInt32 nUtf32 = readUcs4(
551 : &p, pEnd,
552 : (eMechanism == rtl_UriEncodeKeepEscapes
553 32312100 : || eMechanism == rtl_UriEncodeCheckEscapes
554 64643495 : || eMechanism == rtl_UriEncodeStrictKeepEscapes),
555 64677150 : eCharset, &eType);
556 32338575 : switch (eType)
557 : {
558 : case EscapeNo:
559 32338567 : if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
560 : writeUnicode(pResult, &nCapacity,
561 32328335 : static_cast< sal_Unicode >(nUtf32));
562 10232 : else if (!writeEscapeChar(
563 : pResult, &nCapacity, nUtf32, eCharset,
564 : (eMechanism == rtl_UriEncodeStrict
565 10232 : || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
566 : {
567 2 : rtl_uString_new(pResult);
568 2 : return;
569 : }
570 32338565 : break;
571 :
572 : case EscapeChar:
573 4 : if (eMechanism == rtl_UriEncodeCheckEscapes
574 4 : && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
575 : writeUnicode(pResult, &nCapacity,
576 0 : static_cast< sal_Unicode >(nUtf32));
577 4 : else if (!writeEscapeChar(
578 : pResult, &nCapacity, nUtf32, eCharset,
579 : (eMechanism == rtl_UriEncodeStrict
580 4 : || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
581 : {
582 0 : rtl_uString_new(pResult);
583 0 : return;
584 : }
585 4 : break;
586 :
587 : case EscapeOctet:
588 4 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
589 4 : break;
590 : }
591 : }
592 452526 : *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
593 : }
594 :
595 1206209 : void SAL_CALL rtl_uriDecode(rtl_uString * pText,
596 : rtl_UriDecodeMechanism eMechanism,
597 : rtl_TextEncoding eCharset, rtl_uString ** pResult)
598 : SAL_THROW_EXTERN_C()
599 : {
600 1206209 : switch (eMechanism)
601 : {
602 : case rtl_UriDecodeNone:
603 0 : rtl_uString_assign(pResult, pText);
604 0 : break;
605 :
606 : case rtl_UriDecodeToIuri:
607 5 : eCharset = RTL_TEXTENCODING_UTF8;
608 : //fall-through
609 : default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
610 : {
611 1206209 : sal_Unicode const * p = pText->buffer;
612 1206209 : sal_Unicode const * pEnd = p + pText->length;
613 1206209 : sal_Int32 nCapacity = pText->length;
614 1206209 : rtl_uString_new_WithLength(pResult, nCapacity);
615 89332535 : while (p < pEnd)
616 : {
617 : EscapeType eType;
618 86920120 : sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
619 86920120 : switch (eType)
620 : {
621 : case EscapeChar:
622 10360 : if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
623 : {
624 1 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
625 1 : break;
626 : }
627 : case EscapeNo:
628 86920095 : writeUcs4(pResult, &nCapacity, nUtf32);
629 86920095 : break;
630 :
631 : case EscapeOctet:
632 24 : if (eMechanism == rtl_UriDecodeStrict) {
633 3 : rtl_uString_new(pResult);
634 1206212 : return;
635 : }
636 21 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
637 21 : break;
638 : }
639 : }
640 1206206 : *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
641 : }
642 1206206 : break;
643 : }
644 : }
645 :
646 79922 : sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
647 : rtl_uString * pRelUriRef,
648 : rtl_uString ** pResult,
649 : rtl_uString ** pException)
650 : SAL_THROW_EXTERN_C()
651 : {
652 : // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the
653 : // relative URI into an absolute one:
654 79922 : rtl::OUStringBuffer aBuffer;
655 79922 : Components aRelComponents;
656 79922 : parseUriRef(pRelUriRef, &aRelComponents);
657 79922 : if (aRelComponents.aScheme.isPresent())
658 : {
659 : aBuffer.append(aRelComponents.aScheme.pBegin,
660 34214 : aRelComponents.aScheme.getLength());
661 34214 : if (aRelComponents.aAuthority.isPresent())
662 : aBuffer.append(aRelComponents.aAuthority.pBegin,
663 19 : aRelComponents.aAuthority.getLength());
664 : appendPath(
665 : aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin,
666 34214 : aRelComponents.aPath.pEnd);
667 34214 : if (aRelComponents.aQuery.isPresent())
668 : aBuffer.append(aRelComponents.aQuery.pBegin,
669 0 : aRelComponents.aQuery.getLength());
670 : }
671 : else
672 : {
673 45708 : Components aBaseComponents;
674 45708 : parseUriRef(pBaseUriRef, &aBaseComponents);
675 45708 : if (!aBaseComponents.aScheme.isPresent())
676 : {
677 : rtl_uString_assign(
678 : pException,
679 : (rtl::OUString(
680 2 : "<" + rtl::OUString(pBaseUriRef)
681 2 : + "> does not start with a scheme component")
682 1 : .pData));
683 1 : return false;
684 : }
685 : aBuffer.append(aBaseComponents.aScheme.pBegin,
686 45707 : aBaseComponents.aScheme.getLength());
687 45707 : if (aRelComponents.aAuthority.isPresent())
688 : {
689 : aBuffer.append(aRelComponents.aAuthority.pBegin,
690 1 : aRelComponents.aAuthority.getLength());
691 : appendPath(
692 : aBuffer, aBuffer.getLength(), false,
693 1 : aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
694 1 : if (aRelComponents.aQuery.isPresent())
695 : aBuffer.append(aRelComponents.aQuery.pBegin,
696 0 : aRelComponents.aQuery.getLength());
697 : }
698 : else
699 : {
700 45706 : if (aBaseComponents.aAuthority.isPresent())
701 : aBuffer.append(aBaseComponents.aAuthority.pBegin,
702 45705 : aBaseComponents.aAuthority.getLength());
703 45706 : if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd)
704 : {
705 : aBuffer.append(aBaseComponents.aPath.pBegin,
706 3 : aBaseComponents.aPath.getLength());
707 3 : if (aRelComponents.aQuery.isPresent())
708 : aBuffer.append(aRelComponents.aQuery.pBegin,
709 1 : aRelComponents.aQuery.getLength());
710 2 : else if (aBaseComponents.aQuery.isPresent())
711 : aBuffer.append(aBaseComponents.aQuery.pBegin,
712 2 : aBaseComponents.aQuery.getLength());
713 : }
714 : else
715 : {
716 45703 : if (aRelComponents.aPath.pBegin != aRelComponents.aPath.pEnd
717 45703 : && *aRelComponents.aPath.pBegin == '/')
718 : appendPath(
719 : aBuffer, aBuffer.getLength(), false,
720 3 : aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
721 91400 : else if (aBaseComponents.aAuthority.isPresent()
722 91399 : && aBaseComponents.aPath.pBegin
723 45699 : == aBaseComponents.aPath.pEnd)
724 : appendPath(
725 : aBuffer, aBuffer.getLength(), true,
726 1 : aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
727 : else
728 : {
729 45699 : sal_Int32 n = aBuffer.getLength();
730 : sal_Int32 i = rtl_ustr_lastIndexOfChar_WithLength(
731 : aBaseComponents.aPath.pBegin,
732 45699 : aBaseComponents.aPath.getLength(), '/');
733 45699 : if (i >= 0) {
734 : appendPath(
735 : aBuffer, n, false, aBaseComponents.aPath.pBegin,
736 45699 : aBaseComponents.aPath.pBegin + i);
737 : }
738 : appendPath(
739 : aBuffer, n, i >= 0, aRelComponents.aPath.pBegin,
740 45699 : aRelComponents.aPath.pEnd);
741 : }
742 45703 : if (aRelComponents.aQuery.isPresent())
743 : aBuffer.append(aRelComponents.aQuery.pBegin,
744 5 : aRelComponents.aQuery.getLength());
745 : }
746 : }
747 : }
748 79921 : if (aRelComponents.aFragment.isPresent())
749 : aBuffer.append(aRelComponents.aFragment.pBegin,
750 6 : aRelComponents.aFragment.getLength());
751 79921 : rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
752 79921 : return true;
753 : }
754 :
755 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|