Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "surrogates.hxx"
21 :
22 : #include "osl/diagnose.h"
23 : #include "rtl/character.hxx"
24 : #include "rtl/strbuf.hxx"
25 : #include "rtl/textenc.h"
26 : #include "rtl/textcvt.h"
27 : #include "rtl/uri.h"
28 : #include "rtl/ustrbuf.h"
29 : #include "rtl/ustrbuf.hxx"
30 : #include "rtl/ustring.h"
31 : #include "rtl/ustring.hxx"
32 : #include "sal/types.h"
33 : #include "sal/macros.h"
34 :
35 : #include <algorithm>
36 : #include <cstddef>
37 :
38 : namespace {
39 :
40 : std::size_t const nCharClassSize = 128;
41 :
42 : sal_Unicode const cEscapePrefix = 0x25; // '%'
43 :
44 53668 : inline int getHexWeight(sal_uInt32 nUtf32)
45 : {
46 53506 : return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
47 35308 : static_cast< int >(nUtf32 - 0x30) :
48 18198 : nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
49 18090 : static_cast< int >(nUtf32 - 0x41 + 10) :
50 108 : nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
51 108 : static_cast< int >(nUtf32 - 0x61 + 10) :
52 107174 : -1; // not a hex digit
53 : }
54 :
55 43129228 : inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
56 : {
57 43129228 : return nUtf32 < nCharClassSize && pCharClass[nUtf32];
58 : }
59 :
60 148793805 : inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
61 : sal_Unicode cChar)
62 : {
63 148793805 : rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
64 148793805 : }
65 :
66 : enum EscapeType
67 : {
68 : EscapeNo,
69 : EscapeChar,
70 : EscapeOctet
71 : };
72 :
73 : /* Read any of the following:
74 :
75 : - sequence of escape sequences representing character from eCharset,
76 : translated to single UCS4 character; or
77 :
78 : - pair of UTF-16 surrogates, translated to single UCS4 character; or
79 :
80 : _ single UTF-16 character, extended to UCS4 character.
81 : */
82 148740448 : sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
83 : bool bEncoded, rtl_TextEncoding eCharset,
84 : EscapeType * pType)
85 : {
86 148740448 : sal_uInt32 nChar = *(*pBegin)++;
87 : int nWeight1;
88 : int nWeight2;
89 148763158 : if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
90 22686 : && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
91 148762972 : && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
92 : {
93 22524 : *pBegin += 2;
94 22524 : nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
95 22524 : if (nChar <= 0x7F)
96 14204 : *pType = EscapeChar;
97 8320 : else if (eCharset == RTL_TEXTENCODING_UTF8)
98 : {
99 4204 : if (nChar >= 0xC0 && nChar <= 0xF4)
100 : {
101 : sal_uInt32 nEncoded;
102 : int nShift;
103 : sal_uInt32 nMin;
104 4170 : if (nChar <= 0xDF)
105 : {
106 4134 : nEncoded = (nChar & 0x1F) << 6;
107 4134 : nShift = 0;
108 4134 : nMin = 0x80;
109 : }
110 36 : else if (nChar <= 0xEF)
111 : {
112 32 : nEncoded = (nChar & 0x0F) << 12;
113 32 : nShift = 6;
114 32 : nMin = 0x800;
115 : }
116 : else
117 : {
118 4 : nEncoded = (nChar & 0x07) << 18;
119 4 : nShift = 12;
120 4 : nMin = 0x10000;
121 : }
122 4170 : sal_Unicode const * p = *pBegin;
123 4170 : bool bUTF8 = true;
124 8376 : for (; nShift >= 0; nShift -= 6)
125 : {
126 12624 : if (pEnd - p < 3 || p[0] != cEscapePrefix
127 4208 : || (nWeight1 = getHexWeight(p[1])) < 8
128 4208 : || nWeight1 > 11
129 8414 : || (nWeight2 = getHexWeight(p[2])) < 0)
130 : {
131 2 : bUTF8 = false;
132 2 : break;
133 : }
134 4206 : p += 3;
135 4206 : nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
136 : }
137 8338 : if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
138 8326 : && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
139 : {
140 4152 : *pBegin = p;
141 4152 : *pType = EscapeChar;
142 4152 : return nEncoded;
143 : }
144 : }
145 52 : *pType = EscapeOctet;
146 : }
147 : else
148 : {
149 4116 : rtl::OStringBuffer aBuf;
150 4116 : aBuf.append(static_cast< char >(nChar));
151 : rtl_TextToUnicodeConverter aConverter
152 4116 : = rtl_createTextToUnicodeConverter(eCharset);
153 4116 : sal_Unicode const * p = *pBegin;
154 : for (;;)
155 : {
156 : sal_Unicode aDst[2];
157 : sal_uInt32 nInfo;
158 : sal_Size nConverted;
159 : sal_Size nDstSize = rtl_convertTextToUnicode(
160 4144 : aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
161 : SAL_N_ELEMENTS( aDst ),
162 : (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
163 : | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
164 : | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
165 4144 : &nInfo, &nConverted);
166 4144 : if (nInfo == 0)
167 : {
168 : assert( nConverted
169 : == sal::static_int_cast< sal_uInt32 >(
170 : aBuf.getLength()));
171 4112 : rtl_destroyTextToUnicodeConverter(aConverter);
172 4112 : *pBegin = p;
173 4112 : *pType = EscapeChar;
174 : assert( nDstSize == 1
175 : || (nDstSize == 2 && isHighSurrogate(aDst[0])
176 : && isLowSurrogate(aDst[1])));
177 : return nDstSize == 1
178 4112 : ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
179 : }
180 64 : else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
181 28 : && pEnd - p >= 3 && p[0] == cEscapePrefix
182 22 : && (nWeight1 = getHexWeight(p[1])) >= 0
183 54 : && (nWeight2 = getHexWeight(p[2])) >= 0)
184 : {
185 22 : p += 3;
186 22 : aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
187 : }
188 10 : else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
189 6 : && p != pEnd && *p <= 0x7F)
190 : {
191 6 : aBuf.append(static_cast< char >(*p++));
192 : }
193 : else
194 : {
195 : assert(
196 : (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
197 : == 0);
198 4 : break;
199 : }
200 28 : }
201 4 : rtl_destroyTextToUnicodeConverter(aConverter);
202 4 : *pType = EscapeOctet;
203 : }
204 14260 : return nChar;
205 : }
206 : else
207 : {
208 148717924 : *pType = EscapeNo;
209 148717938 : return isHighSurrogate(nChar) && *pBegin < pEnd
210 14 : && isLowSurrogate(**pBegin) ?
211 148717932 : combineSurrogates(nChar, *(*pBegin)++) : nChar;
212 : }
213 : }
214 :
215 105611079 : void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
216 : {
217 : assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
218 105611079 : if (nUtf32 <= 0xFFFF) {
219 : writeUnicode(
220 105611073 : pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
221 : } else {
222 6 : nUtf32 -= 0x10000;
223 : writeUnicode(
224 : pBuffer, pCapacity,
225 6 : static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
226 : writeUnicode(
227 : pBuffer, pCapacity,
228 6 : static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
229 : }
230 105611077 : }
231 :
232 24602 : void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
233 : sal_uInt32 nOctet)
234 : {
235 : assert(nOctet <= 0xFF); // bad octet
236 :
237 : static sal_Unicode const aHex[16]
238 : = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
239 : 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
240 :
241 24602 : writeUnicode(pBuffer, pCapacity, cEscapePrefix);
242 24602 : writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
243 24602 : writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
244 24602 : }
245 :
246 20396 : bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
247 : sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
248 : {
249 : assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
250 20396 : if (eCharset == RTL_TEXTENCODING_UTF8) {
251 8740 : if (nUtf32 < 0x80)
252 4618 : writeEscapeOctet(pBuffer, pCapacity, nUtf32);
253 4122 : else if (nUtf32 < 0x800)
254 : {
255 4102 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
256 4102 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
257 : }
258 20 : else if (nUtf32 < 0x10000)
259 : {
260 14 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
261 14 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
262 14 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
263 : }
264 : else
265 : {
266 6 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
267 6 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
268 6 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
269 6 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
270 : }
271 : } else {
272 : rtl_UnicodeToTextConverter aConverter
273 11656 : = rtl_createUnicodeToTextConverter(eCharset);
274 : sal_Unicode aSrc[2];
275 : sal_Size nSrcSize;
276 11656 : if (nUtf32 <= 0xFFFF)
277 : {
278 11654 : aSrc[0] = static_cast< sal_Unicode >(nUtf32);
279 11654 : nSrcSize = 1;
280 : }
281 : else
282 : {
283 : aSrc[0] = static_cast< sal_Unicode >(
284 2 : ((nUtf32 - 0x10000) >> 10) | 0xD800);
285 : aSrc[1] = static_cast< sal_Unicode >(
286 2 : ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
287 2 : nSrcSize = 2;
288 : }
289 : sal_Char aDst[32]; // FIXME random value
290 : sal_uInt32 nInfo;
291 : sal_Size nConverted;
292 : sal_Size nDstSize = rtl_convertUnicodeToText(
293 : aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
294 : RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
295 : | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
296 : | RTL_UNICODETOTEXT_FLAGS_FLUSH,
297 11656 : &nInfo, &nConverted);
298 : assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
299 11656 : rtl_destroyUnicodeToTextConverter(aConverter);
300 11656 : if (nInfo == 0) {
301 : assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
302 23312 : for (sal_Size i = 0; i < nDstSize; ++i)
303 : writeEscapeOctet(pBuffer, pCapacity,
304 11662 : static_cast< unsigned char >(aDst[i]));
305 : // FIXME all octets are escaped, even if there is no need
306 : } else {
307 6 : if (bStrict) {
308 4 : return false;
309 : } else {
310 2 : writeUcs4(pBuffer, pCapacity, nUtf32);
311 : }
312 : }
313 : }
314 20392 : return true;
315 : }
316 :
317 : struct Component
318 : {
319 : sal_Unicode const * pBegin;
320 : sal_Unicode const * pEnd;
321 :
322 976610 : inline Component(): pBegin(0), pEnd(0) {}
323 :
324 707130 : inline bool isPresent() const { return pBegin != 0; }
325 :
326 : inline sal_Int32 getLength() const;
327 : };
328 :
329 269484 : inline sal_Int32 Component::getLength() const
330 : {
331 : assert(isPresent()); // taking length of non-present component
332 269484 : return static_cast< sal_Int32 >(pEnd - pBegin);
333 : }
334 :
335 195322 : struct Components
336 : {
337 : Component aScheme;
338 : Component aAuthority;
339 : Component aPath;
340 : Component aQuery;
341 : Component aFragment;
342 : };
343 :
344 195322 : void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
345 : {
346 : // This algorithm is liberal and accepts various forms of illegal input.
347 :
348 195322 : sal_Unicode const * pBegin = pUriRef->buffer;
349 195322 : sal_Unicode const * pEnd = pBegin + pUriRef->length;
350 195322 : sal_Unicode const * pPos = pBegin;
351 :
352 195322 : if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
353 : {
354 1932129 : for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
355 : {
356 1878919 : if (*p == ':')
357 : {
358 121184 : pComponents->aScheme.pBegin = pBegin;
359 121184 : pComponents->aScheme.pEnd = ++p;
360 121184 : pPos = p;
361 121184 : break;
362 : }
363 3729572 : else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
364 1971837 : && *p != '.')
365 : {
366 20876 : break;
367 : }
368 : }
369 : }
370 :
371 195322 : if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
372 : {
373 74150 : pComponents->aAuthority.pBegin = pPos;
374 74150 : pPos += 2;
375 148392 : while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
376 92 : ++pPos;
377 74150 : pComponents->aAuthority.pEnd = pPos;
378 : }
379 :
380 195322 : pComponents->aPath.pBegin = pPos;
381 6691858 : while (pPos != pEnd && *pPos != '?' && * pPos != '#')
382 6301214 : ++pPos;
383 195322 : pComponents->aPath.pEnd = pPos;
384 :
385 195322 : if (pPos != pEnd && *pPos == '?')
386 : {
387 96 : pComponents->aQuery.pBegin = pPos++;
388 306 : while (pPos != pEnd && * pPos != '#')
389 114 : ++pPos;
390 96 : pComponents->aQuery.pEnd = pPos;
391 : }
392 :
393 195322 : if (pPos != pEnd)
394 : {
395 : assert(*pPos == '#');
396 12 : pComponents->aFragment.pBegin = pPos;
397 12 : pComponents->aFragment.pEnd = pEnd;
398 : }
399 195322 : }
400 :
401 195296 : void appendPath(
402 : rtl::OUStringBuffer & buffer, sal_Int32 bufferStart, bool precedingSlash,
403 : sal_Unicode const * pathBegin, sal_Unicode const * pathEnd)
404 : {
405 1096318 : while (precedingSlash || pathBegin != pathEnd) {
406 705726 : sal_Unicode const * p = pathBegin;
407 6259373 : while (p != pathEnd && *p != '/') {
408 4847921 : ++p;
409 : }
410 705726 : std::size_t n = p - pathBegin;
411 705726 : if (n == 1 && pathBegin[0] == '.') {
412 : // input begins with "." -> remove from input (and done):
413 : // i.e., !precedingSlash -> !precedingSlash
414 : // input begins with "./" -> remove from input:
415 : // i.e., !precedingSlash -> !precedingSlash
416 : // input begins with "/." -> replace with "/" in input (and not yet
417 : // done):
418 : // i.e., precedingSlash -> precedingSlash
419 : // input begins with "/./" -> replace with "/" in input:
420 : // i.e., precedingSlash -> precedingSlash
421 705702 : } else if (n == 2 && pathBegin[0] == '.' && pathBegin[1] == '.') {
422 : // input begins with ".." -> remove from input (and done):
423 : // i.e., !precedingSlash -> !precedingSlash
424 : // input begins with "../" -> remove from input
425 : // i.e., !precedingSlash -> !precedingSlash
426 : // input begins with "/.." -> replace with "/" in input, and shrink
427 : // output (not not yet done):
428 : // i.e., precedingSlash -> precedingSlash
429 : // input begins with "/../" -> replace with "/" in input, and shrink
430 : // output:
431 : // i.e., precedingSlash -> precedingSlash
432 108 : if (precedingSlash) {
433 : buffer.truncate(
434 : bufferStart
435 : + std::max<sal_Int32>(
436 : rtl_ustr_lastIndexOfChar_WithLength(
437 54 : buffer.getStr() + bufferStart,
438 108 : buffer.getLength() - bufferStart, '/'),
439 108 : 0));
440 : }
441 : } else {
442 705648 : if (precedingSlash) {
443 584474 : buffer.append('/');
444 : }
445 705648 : buffer.append(pathBegin, n);
446 705648 : precedingSlash = p != pathEnd;
447 : }
448 705726 : pathBegin = p + (p == pathEnd ? 0 : 1);
449 : }
450 195296 : }
451 :
452 : }
453 :
454 25438 : sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
455 : SAL_THROW_EXTERN_C()
456 : {
457 : static sal_Bool const aCharClass[][nCharClassSize]
458 : = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
459 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
460 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
461 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
462 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
463 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
464 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
465 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
466 : },
467 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
468 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
469 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
470 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
471 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
472 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
473 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
474 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
475 : },
476 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
477 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
478 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
479 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
480 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
481 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
482 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
483 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
484 : },
485 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
486 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
487 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
488 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
489 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
490 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
491 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
492 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
493 : },
494 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
495 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
496 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
497 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
498 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
499 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
500 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
501 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
502 : },
503 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
504 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
505 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
506 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
507 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
508 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
509 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
510 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
511 : },
512 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
513 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
514 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
515 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
516 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
517 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
518 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
519 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
520 : },
521 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
522 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
523 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
524 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
525 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
526 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
527 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
528 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
529 : }};
530 : assert(
531 : (eCharClass >= 0
532 : && (sal::static_int_cast< std::size_t >(eCharClass)
533 : < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
534 25438 : return aCharClass[eCharClass];
535 : }
536 :
537 665430 : void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
538 : rtl_UriEncodeMechanism eMechanism,
539 : rtl_TextEncoding eCharset, rtl_uString ** pResult)
540 : SAL_THROW_EXTERN_C()
541 : {
542 : assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
543 :
544 665430 : sal_Unicode const * p = pText->buffer;
545 665430 : sal_Unicode const * pEnd = p + pText->length;
546 665430 : sal_Int32 nCapacity = pText->length;
547 665430 : rtl_uString_new_WithLength(pResult, nCapacity);
548 665430 : while (p < pEnd)
549 : {
550 : EscapeType eType;
551 : sal_uInt32 nUtf32 = readUcs4(
552 : &p, pEnd,
553 : (eMechanism == rtl_UriEncodeKeepEscapes
554 43072808 : || eMechanism == rtl_UriEncodeCheckEscapes
555 86187770 : || eMechanism == rtl_UriEncodeStrictKeepEscapes),
556 86258644 : eCharset, &eType);
557 43129322 : switch (eType)
558 : {
559 : case EscapeNo:
560 43129222 : if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
561 : writeUnicode(pResult, &nCapacity,
562 43108918 : static_cast< sal_Unicode >(nUtf32));
563 20304 : else if (!writeEscapeChar(
564 : pResult, &nCapacity, nUtf32, eCharset,
565 : (eMechanism == rtl_UriEncodeStrict
566 20304 : || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
567 : {
568 4 : rtl_uString_new(pResult);
569 4 : return;
570 : }
571 43129218 : break;
572 :
573 : case EscapeChar:
574 92 : if (eMechanism == rtl_UriEncodeCheckEscapes
575 92 : && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
576 : writeUnicode(pResult, &nCapacity,
577 0 : static_cast< sal_Unicode >(nUtf32));
578 92 : else if (!writeEscapeChar(
579 : pResult, &nCapacity, nUtf32, eCharset,
580 : (eMechanism == rtl_UriEncodeStrict
581 92 : || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
582 : {
583 0 : rtl_uString_new(pResult);
584 0 : return;
585 : }
586 92 : break;
587 :
588 : case EscapeOctet:
589 8 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
590 8 : break;
591 : }
592 : }
593 665426 : *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
594 : }
595 :
596 1661925 : void SAL_CALL rtl_uriDecode(rtl_uString * pText,
597 : rtl_UriDecodeMechanism eMechanism,
598 : rtl_TextEncoding eCharset, rtl_uString ** pResult)
599 : SAL_THROW_EXTERN_C()
600 : {
601 1661925 : switch (eMechanism)
602 : {
603 : case rtl_UriDecodeNone:
604 0 : rtl_uString_assign(pResult, pText);
605 0 : break;
606 :
607 : case rtl_UriDecodeToIuri:
608 10 : eCharset = RTL_TEXTENCODING_UTF8;
609 : //fall-through
610 : default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
611 : {
612 1661925 : sal_Unicode const * p = pText->buffer;
613 1661925 : sal_Unicode const * pEnd = p + pText->length;
614 1661925 : sal_Int32 nCapacity = pText->length;
615 1661925 : rtl_uString_new_WithLength(pResult, nCapacity);
616 108934970 : while (p < pEnd)
617 : {
618 : EscapeType eType;
619 105611126 : sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
620 105611128 : switch (eType)
621 : {
622 : case EscapeChar:
623 22376 : if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
624 : {
625 2 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
626 2 : break;
627 : }
628 : case EscapeNo:
629 105611078 : writeUcs4(pResult, &nCapacity, nUtf32);
630 105611076 : break;
631 :
632 : case EscapeOctet:
633 48 : if (eMechanism == rtl_UriDecodeStrict) {
634 6 : rtl_uString_new(pResult);
635 1661931 : return;
636 : }
637 42 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
638 42 : break;
639 : }
640 : }
641 1661919 : *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
642 : }
643 1661919 : break;
644 : }
645 : }
646 :
647 121186 : sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
648 : rtl_uString * pRelUriRef,
649 : rtl_uString ** pResult,
650 : rtl_uString ** pException)
651 : SAL_THROW_EXTERN_C()
652 : {
653 : // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the
654 : // relative URI into an absolute one:
655 121186 : rtl::OUStringBuffer aBuffer;
656 121186 : Components aRelComponents;
657 121186 : parseUriRef(pRelUriRef, &aRelComponents);
658 121186 : if (aRelComponents.aScheme.isPresent())
659 : {
660 : aBuffer.append(aRelComponents.aScheme.pBegin,
661 47050 : aRelComponents.aScheme.getLength());
662 47050 : if (aRelComponents.aAuthority.isPresent())
663 : aBuffer.append(aRelComponents.aAuthority.pBegin,
664 16 : aRelComponents.aAuthority.getLength());
665 : appendPath(
666 : aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin,
667 47050 : aRelComponents.aPath.pEnd);
668 47050 : if (aRelComponents.aQuery.isPresent())
669 : aBuffer.append(aRelComponents.aQuery.pBegin,
670 0 : aRelComponents.aQuery.getLength());
671 : }
672 : else
673 : {
674 74136 : Components aBaseComponents;
675 74136 : parseUriRef(pBaseUriRef, &aBaseComponents);
676 74136 : if (!aBaseComponents.aScheme.isPresent())
677 : {
678 : rtl_uString_assign(
679 : pException,
680 : (rtl::OUString(
681 4 : "<" + rtl::OUString(pBaseUriRef)
682 4 : + "> does not start with a scheme component")
683 2 : .pData));
684 2 : return false;
685 : }
686 : aBuffer.append(aBaseComponents.aScheme.pBegin,
687 74134 : aBaseComponents.aScheme.getLength());
688 74134 : if (aRelComponents.aAuthority.isPresent())
689 : {
690 : aBuffer.append(aRelComponents.aAuthority.pBegin,
691 2 : aRelComponents.aAuthority.getLength());
692 : appendPath(
693 : aBuffer, aBuffer.getLength(), false,
694 2 : aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
695 2 : if (aRelComponents.aQuery.isPresent())
696 : aBuffer.append(aRelComponents.aQuery.pBegin,
697 0 : aRelComponents.aQuery.getLength());
698 : }
699 : else
700 : {
701 74132 : if (aBaseComponents.aAuthority.isPresent())
702 : aBuffer.append(aBaseComponents.aAuthority.pBegin,
703 74130 : aBaseComponents.aAuthority.getLength());
704 74132 : if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd)
705 : {
706 : aBuffer.append(aBaseComponents.aPath.pBegin,
707 6 : aBaseComponents.aPath.getLength());
708 6 : if (aRelComponents.aQuery.isPresent())
709 : aBuffer.append(aRelComponents.aQuery.pBegin,
710 2 : aRelComponents.aQuery.getLength());
711 4 : else if (aBaseComponents.aQuery.isPresent())
712 : aBuffer.append(aBaseComponents.aQuery.pBegin,
713 4 : aBaseComponents.aQuery.getLength());
714 : }
715 : else
716 : {
717 74126 : if (aRelComponents.aPath.pBegin != aRelComponents.aPath.pEnd
718 74126 : && *aRelComponents.aPath.pBegin == '/')
719 : appendPath(
720 : aBuffer, aBuffer.getLength(), false,
721 6 : aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
722 148240 : else if (aBaseComponents.aAuthority.isPresent()
723 148238 : && aBaseComponents.aPath.pBegin
724 74118 : == aBaseComponents.aPath.pEnd)
725 : appendPath(
726 : aBuffer, aBuffer.getLength(), true,
727 2 : aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
728 : else
729 : {
730 74118 : sal_Int32 n = aBuffer.getLength();
731 : sal_Int32 i = rtl_ustr_lastIndexOfChar_WithLength(
732 : aBaseComponents.aPath.pBegin,
733 74118 : aBaseComponents.aPath.getLength(), '/');
734 74118 : if (i >= 0) {
735 : appendPath(
736 : aBuffer, n, false, aBaseComponents.aPath.pBegin,
737 74118 : aBaseComponents.aPath.pBegin + i);
738 : }
739 : appendPath(
740 : aBuffer, n, i >= 0, aRelComponents.aPath.pBegin,
741 74118 : aRelComponents.aPath.pEnd);
742 : }
743 74126 : if (aRelComponents.aQuery.isPresent())
744 : aBuffer.append(aRelComponents.aQuery.pBegin,
745 10 : aRelComponents.aQuery.getLength());
746 : }
747 : }
748 : }
749 121184 : if (aRelComponents.aFragment.isPresent())
750 : aBuffer.append(aRelComponents.aFragment.pBegin,
751 12 : aRelComponents.aFragment.getLength());
752 121184 : rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
753 121184 : return true;
754 : }
755 :
756 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|