Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include "surrogates.hxx"
21 :
22 : #include "osl/diagnose.h"
23 : #include "rtl/character.hxx"
24 : #include "rtl/strbuf.hxx"
25 : #include "rtl/textenc.h"
26 : #include "rtl/textcvt.h"
27 : #include "rtl/uri.h"
28 : #include "rtl/ustrbuf.h"
29 : #include "rtl/ustrbuf.hxx"
30 : #include "rtl/ustring.h"
31 : #include "rtl/ustring.hxx"
32 : #include "sal/types.h"
33 : #include "sal/macros.h"
34 :
35 : #include <algorithm>
36 : #include <cstddef>
37 :
38 : namespace {
39 :
40 : std::size_t const nCharClassSize = 128;
41 :
42 : sal_Unicode const cEscapePrefix = 0x25; // '%'
43 :
44 70941320 : inline bool isHighSurrogate(sal_uInt32 nUtf16)
45 : {
46 70941320 : return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
47 : }
48 :
49 2085 : inline bool isLowSurrogate(sal_uInt32 nUtf16)
50 : {
51 2085 : return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
52 : }
53 :
54 5 : inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
55 : {
56 5 : return SAL_RTL_COMBINE_SURROGATES(high, low);
57 : }
58 :
59 26222 : inline int getHexWeight(sal_uInt32 nUtf32)
60 : {
61 26141 : return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
62 17038 : static_cast< int >(nUtf32 - 0x30) :
63 9103 : nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
64 9049 : static_cast< int >(nUtf32 - 0x41 + 10) :
65 54 : nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
66 54 : static_cast< int >(nUtf32 - 0x61 + 10) :
67 52363 : -1; // not a hex digit
68 : }
69 :
70 19079632 : inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
71 : {
72 19079632 : return nUtf32 < nCharClassSize && pCharClass[nUtf32];
73 : }
74 :
75 70974907 : inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
76 : sal_Unicode cChar)
77 : {
78 70974907 : rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
79 70974907 : }
80 :
81 : enum EscapeType
82 : {
83 : EscapeNo,
84 : EscapeChar,
85 : EscapeOctet
86 : };
87 :
88 : /* Read any of the following:
89 :
90 : - sequence of escape sequences representing character from eCharset,
91 : translated to single UCS4 character; or
92 :
93 : - pair of UTF-16 surrogates, translated to single UCS4 character; or
94 :
95 : _ single UTF-16 character, extended to UCS4 character.
96 : */
97 70950193 : sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
98 : bool bEncoded, rtl_TextEncoding eCharset,
99 : EscapeType * pType)
100 : {
101 70950193 : sal_uInt32 nChar = *(*pBegin)++;
102 : int nWeight1;
103 : int nWeight2;
104 70961242 : if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
105 11037 : && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
106 70961149 : && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
107 : {
108 10956 : *pBegin += 2;
109 10956 : nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
110 10956 : if (nChar <= 0x7F)
111 6796 : *pType = EscapeChar;
112 4160 : else if (eCharset == RTL_TEXTENCODING_UTF8)
113 : {
114 2102 : if (nChar >= 0xC0 && nChar <= 0xF4)
115 : {
116 : sal_uInt32 nEncoded;
117 : int nShift;
118 : sal_uInt32 nMin;
119 2085 : if (nChar <= 0xDF)
120 : {
121 2067 : nEncoded = (nChar & 0x1F) << 6;
122 2067 : nShift = 0;
123 2067 : nMin = 0x80;
124 : }
125 18 : else if (nChar <= 0xEF)
126 : {
127 16 : nEncoded = (nChar & 0x0F) << 12;
128 16 : nShift = 6;
129 16 : nMin = 0x800;
130 : }
131 : else
132 : {
133 2 : nEncoded = (nChar & 0x07) << 18;
134 2 : nShift = 12;
135 2 : nMin = 0x10000;
136 : }
137 2085 : sal_Unicode const * p = *pBegin;
138 2085 : bool bUTF8 = true;
139 4188 : for (; nShift >= 0; nShift -= 6)
140 : {
141 6312 : if (pEnd - p < 3 || p[0] != cEscapePrefix
142 2104 : || (nWeight1 = getHexWeight(p[1])) < 8
143 2104 : || nWeight1 > 11
144 4207 : || (nWeight2 = getHexWeight(p[2])) < 0)
145 : {
146 1 : bUTF8 = false;
147 1 : break;
148 : }
149 2103 : p += 3;
150 2103 : nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
151 : }
152 4169 : if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
153 4163 : && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
154 : {
155 2076 : *pBegin = p;
156 2076 : *pType = EscapeChar;
157 2076 : return nEncoded;
158 : }
159 : }
160 26 : *pType = EscapeOctet;
161 : }
162 : else
163 : {
164 2058 : rtl::OStringBuffer aBuf;
165 2058 : aBuf.append(static_cast< char >(nChar));
166 : rtl_TextToUnicodeConverter aConverter
167 2058 : = rtl_createTextToUnicodeConverter(eCharset);
168 2058 : sal_Unicode const * p = *pBegin;
169 : for (;;)
170 : {
171 : sal_Unicode aDst[2];
172 : sal_uInt32 nInfo;
173 : sal_Size nConverted;
174 : sal_Size nDstSize = rtl_convertTextToUnicode(
175 2072 : aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
176 : SAL_N_ELEMENTS( aDst ),
177 : (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
178 : | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
179 : | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
180 2072 : &nInfo, &nConverted);
181 2072 : if (nInfo == 0)
182 : {
183 : assert( nConverted
184 : == sal::static_int_cast< sal_uInt32 >(
185 : aBuf.getLength()));
186 2056 : rtl_destroyTextToUnicodeConverter(aConverter);
187 2056 : *pBegin = p;
188 2056 : *pType = EscapeChar;
189 : assert( nDstSize == 1
190 : || (nDstSize == 2 && isHighSurrogate(aDst[0])
191 : && isLowSurrogate(aDst[1])));
192 : return nDstSize == 1
193 2056 : ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
194 : }
195 32 : else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
196 14 : && pEnd - p >= 3 && p[0] == cEscapePrefix
197 11 : && (nWeight1 = getHexWeight(p[1])) >= 0
198 27 : && (nWeight2 = getHexWeight(p[2])) >= 0)
199 : {
200 11 : p += 3;
201 11 : aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
202 : }
203 5 : else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
204 3 : && p != pEnd && *p <= 0x7F)
205 : {
206 3 : aBuf.append(static_cast< char >(*p++));
207 : }
208 : else
209 : {
210 : assert(
211 : (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
212 : == 0);
213 2 : break;
214 : }
215 14 : }
216 2 : rtl_destroyTextToUnicodeConverter(aConverter);
217 2 : *pType = EscapeOctet;
218 : }
219 6824 : return nChar;
220 : }
221 : else
222 : {
223 70939237 : *pType = EscapeNo;
224 70939244 : return isHighSurrogate(nChar) && *pBegin < pEnd
225 7 : && isLowSurrogate(**pBegin) ?
226 70939241 : combineSurrogates(nChar, *(*pBegin)++) : nChar;
227 : }
228 : }
229 :
230 51870532 : void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
231 : {
232 : assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
233 51870532 : if (nUtf32 <= 0xFFFF) {
234 : writeUnicode(
235 51870529 : pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
236 : } else {
237 3 : nUtf32 -= 0x10000;
238 : writeUnicode(
239 : pBuffer, pCapacity,
240 3 : static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
241 : writeUnicode(
242 : pBuffer, pCapacity,
243 3 : static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
244 : }
245 51870532 : }
246 :
247 11318 : void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
248 : sal_uInt32 nOctet)
249 : {
250 : assert(nOctet <= 0xFF); // bad octet
251 :
252 : static sal_Unicode const aHex[16]
253 : = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
254 : 0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
255 :
256 11318 : writeUnicode(pBuffer, pCapacity, cEscapePrefix);
257 11318 : writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
258 11318 : writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
259 11318 : }
260 :
261 9215 : bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
262 : sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
263 : {
264 : assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
265 9215 : if (eCharset == RTL_TEXTENCODING_UTF8) {
266 3387 : if (nUtf32 < 0x80)
267 1326 : writeEscapeOctet(pBuffer, pCapacity, nUtf32);
268 2061 : else if (nUtf32 < 0x800)
269 : {
270 2051 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
271 2051 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
272 : }
273 10 : else if (nUtf32 < 0x10000)
274 : {
275 7 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
276 7 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
277 7 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
278 : }
279 : else
280 : {
281 3 : writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
282 3 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
283 3 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
284 3 : writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
285 : }
286 : } else {
287 : rtl_UnicodeToTextConverter aConverter
288 5828 : = rtl_createUnicodeToTextConverter(eCharset);
289 : sal_Unicode aSrc[2];
290 : sal_Size nSrcSize;
291 5828 : if (nUtf32 <= 0xFFFF)
292 : {
293 5827 : aSrc[0] = static_cast< sal_Unicode >(nUtf32);
294 5827 : nSrcSize = 1;
295 : }
296 : else
297 : {
298 : aSrc[0] = static_cast< sal_Unicode >(
299 1 : ((nUtf32 - 0x10000) >> 10) | 0xD800);
300 : aSrc[1] = static_cast< sal_Unicode >(
301 1 : ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
302 1 : nSrcSize = 2;
303 : }
304 : sal_Char aDst[32]; // FIXME random value
305 : sal_uInt32 nInfo;
306 : sal_Size nConverted;
307 : sal_Size nDstSize = rtl_convertUnicodeToText(
308 : aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
309 : RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
310 : | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
311 : | RTL_UNICODETOTEXT_FLAGS_FLUSH,
312 5828 : &nInfo, &nConverted);
313 : assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
314 5828 : rtl_destroyUnicodeToTextConverter(aConverter);
315 5828 : if (nInfo == 0) {
316 : assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
317 11656 : for (sal_Size i = 0; i < nDstSize; ++i)
318 : writeEscapeOctet(pBuffer, pCapacity,
319 5831 : static_cast< unsigned char >(aDst[i]));
320 : // FIXME all octets are escaped, even if there is no need
321 : } else {
322 3 : if (bStrict) {
323 2 : return false;
324 : } else {
325 1 : writeUcs4(pBuffer, pCapacity, nUtf32);
326 : }
327 : }
328 : }
329 9213 : return true;
330 : }
331 :
332 : struct Component
333 : {
334 : sal_Unicode const * pBegin;
335 : sal_Unicode const * pEnd;
336 :
337 456915 : inline Component(): pBegin(0), pEnd(0) {}
338 :
339 331902 : inline bool isPresent() const { return pBegin != 0; }
340 :
341 : inline sal_Int32 getLength() const;
342 : };
343 :
344 125398 : inline sal_Int32 Component::getLength() const
345 : {
346 : assert(isPresent()); // taking length of non-present component
347 125398 : return static_cast< sal_Int32 >(pEnd - pBegin);
348 : }
349 :
350 91383 : struct Components
351 : {
352 : Component aScheme;
353 : Component aAuthority;
354 : Component aPath;
355 : Component aQuery;
356 : Component aFragment;
357 : };
358 :
359 91383 : void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
360 : {
361 : // This algorithm is liberal and accepts various forms of illegal input.
362 :
363 91383 : sal_Unicode const * pBegin = pUriRef->buffer;
364 91383 : sal_Unicode const * pEnd = pBegin + pUriRef->length;
365 91383 : sal_Unicode const * pPos = pBegin;
366 :
367 91383 : if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
368 : {
369 904310 : for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
370 : {
371 885189 : if (*p == ':')
372 : {
373 57763 : pComponents->aScheme.pBegin = pBegin;
374 57763 : pComponents->aScheme.pEnd = ++p;
375 57763 : pPos = p;
376 57763 : break;
377 : }
378 1759176 : else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
379 931750 : && *p != '.')
380 : {
381 14473 : break;
382 : }
383 : }
384 : }
385 :
386 91383 : if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
387 : {
388 34009 : pComponents->aAuthority.pBegin = pPos;
389 34009 : pPos += 2;
390 68065 : while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
391 47 : ++pPos;
392 34009 : pComponents->aAuthority.pEnd = pPos;
393 : }
394 :
395 91383 : pComponents->aPath.pBegin = pPos;
396 3194156 : while (pPos != pEnd && *pPos != '?' && * pPos != '#')
397 3011390 : ++pPos;
398 91383 : pComponents->aPath.pEnd = pPos;
399 :
400 91383 : if (pPos != pEnd && *pPos == '?')
401 : {
402 48 : pComponents->aQuery.pBegin = pPos++;
403 153 : while (pPos != pEnd && * pPos != '#')
404 57 : ++pPos;
405 48 : pComponents->aQuery.pEnd = pPos;
406 : }
407 :
408 91383 : if (pPos != pEnd)
409 : {
410 : assert(*pPos == '#');
411 6 : pComponents->aFragment.pBegin = pPos;
412 6 : pComponents->aFragment.pEnd = pEnd;
413 : }
414 91383 : }
415 :
416 91370 : void appendPath(
417 : rtl::OUStringBuffer & buffer, sal_Int32 bufferStart, bool precedingSlash,
418 : sal_Unicode const * pathBegin, sal_Unicode const * pathEnd)
419 : {
420 515927 : while (precedingSlash || pathBegin != pathEnd) {
421 333187 : sal_Unicode const * p = pathBegin;
422 2979766 : while (p != pathEnd && *p != '/') {
423 2313392 : ++p;
424 : }
425 333187 : std::size_t n = p - pathBegin;
426 333187 : if (n == 1 && pathBegin[0] == '.') {
427 : // input begins with "." -> remove from input (and done):
428 : // i.e., !precedingSlash -> !precedingSlash
429 : // input begins with "./" -> remove from input:
430 : // i.e., !precedingSlash -> !precedingSlash
431 : // input begins with "/." -> replace with "/" in input (and not yet
432 : // done):
433 : // i.e., precedingSlash -> precedingSlash
434 : // input begins with "/./" -> replace with "/" in input:
435 : // i.e., precedingSlash -> precedingSlash
436 333175 : } else if (n == 2 && pathBegin[0] == '.' && pathBegin[1] == '.') {
437 : // input begins with ".." -> remove from input (and done):
438 : // i.e., !precedingSlash -> !precedingSlash
439 : // input begins with "../" -> remove from input
440 : // i.e., !precedingSlash -> !precedingSlash
441 : // input begins with "/.." -> replace with "/" in input, and shrink
442 : // output (not not yet done):
443 : // i.e., precedingSlash -> precedingSlash
444 : // input begins with "/../" -> replace with "/" in input, and shrink
445 : // output:
446 : // i.e., precedingSlash -> precedingSlash
447 52 : if (precedingSlash) {
448 : buffer.truncate(
449 : bufferStart
450 : + std::max<sal_Int32>(
451 : rtl_ustr_lastIndexOfChar_WithLength(
452 26 : buffer.getStr() + bufferStart,
453 52 : buffer.getLength() - bufferStart, '/'),
454 52 : 0));
455 : }
456 : } else {
457 333149 : if (precedingSlash) {
458 275391 : buffer.append('/');
459 : }
460 333149 : buffer.append(pathBegin, n);
461 333149 : precedingSlash = p != pathEnd;
462 : }
463 333187 : pathBegin = p + (p == pathEnd ? 0 : 1);
464 : }
465 91370 : }
466 :
467 : }
468 :
469 10501 : sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
470 : SAL_THROW_EXTERN_C()
471 : {
472 : static sal_Bool const aCharClass[][nCharClassSize]
473 : = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
474 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
475 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
476 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
477 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
478 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
479 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
480 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 /*pqrstuvwxyz{|}~ */
481 : },
482 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
483 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
484 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
485 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
486 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
487 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
488 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
489 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
490 : },
491 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
492 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
493 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
494 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
495 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
496 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
497 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
498 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
499 : },
500 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
501 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
502 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
503 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
504 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
505 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
506 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
507 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
508 : },
509 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
510 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
511 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
512 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
513 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
514 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
515 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
516 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
517 : },
518 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
519 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
520 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
521 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
522 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
523 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
524 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
525 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
526 : },
527 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
528 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
529 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
530 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
531 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
532 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
533 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
534 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
535 : },
536 : { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
537 : 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
538 : 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
539 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
540 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
541 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
542 : 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
543 : 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0 /*pqrstuvwxyz{|}~ */
544 : }};
545 : assert(
546 : (eCharClass >= 0
547 : && (sal::static_int_cast< std::size_t >(eCharClass)
548 : < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
549 10501 : return aCharClass[eCharClass];
550 : }
551 :
552 289018 : void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
553 : rtl_UriEncodeMechanism eMechanism,
554 : rtl_TextEncoding eCharset, rtl_uString ** pResult)
555 : SAL_THROW_EXTERN_C()
556 : {
557 : assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
558 :
559 289018 : sal_Unicode const * p = pText->buffer;
560 289018 : sal_Unicode const * pEnd = p + pText->length;
561 289018 : sal_Int32 nCapacity = pText->length;
562 289018 : rtl_uString_new_WithLength(pResult, nCapacity);
563 289018 : while (p < pEnd)
564 : {
565 : EscapeType eType;
566 : sal_uInt32 nUtf32 = readUcs4(
567 : &p, pEnd,
568 : (eMechanism == rtl_UriEncodeKeepEscapes
569 19068188 : || eMechanism == rtl_UriEncodeCheckEscapes
570 38140645 : || eMechanism == rtl_UriEncodeStrictKeepEscapes),
571 38159274 : eCharset, &eType);
572 19079637 : switch (eType)
573 : {
574 : case EscapeNo:
575 19079629 : if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
576 : writeUnicode(pResult, &nCapacity,
577 19070418 : static_cast< sal_Unicode >(nUtf32));
578 9211 : else if (!writeEscapeChar(
579 : pResult, &nCapacity, nUtf32, eCharset,
580 : (eMechanism == rtl_UriEncodeStrict
581 9211 : || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
582 : {
583 2 : rtl_uString_new(pResult);
584 2 : return;
585 : }
586 19079627 : break;
587 :
588 : case EscapeChar:
589 4 : if (eMechanism == rtl_UriEncodeCheckEscapes
590 4 : && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
591 : writeUnicode(pResult, &nCapacity,
592 0 : static_cast< sal_Unicode >(nUtf32));
593 4 : else if (!writeEscapeChar(
594 : pResult, &nCapacity, nUtf32, eCharset,
595 : (eMechanism == rtl_UriEncodeStrict
596 4 : || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
597 : {
598 0 : rtl_uString_new(pResult);
599 0 : return;
600 : }
601 4 : break;
602 :
603 : case EscapeOctet:
604 4 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
605 4 : break;
606 : }
607 : }
608 289016 : *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
609 : }
610 :
611 788701 : void SAL_CALL rtl_uriDecode(rtl_uString * pText,
612 : rtl_UriDecodeMechanism eMechanism,
613 : rtl_TextEncoding eCharset, rtl_uString ** pResult)
614 : SAL_THROW_EXTERN_C()
615 : {
616 788701 : switch (eMechanism)
617 : {
618 : case rtl_UriDecodeNone:
619 0 : rtl_uString_assign(pResult, pText);
620 0 : break;
621 :
622 : case rtl_UriDecodeToIuri:
623 5 : eCharset = RTL_TEXTENCODING_UTF8;
624 : //fall-through
625 : default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
626 : {
627 788701 : sal_Unicode const * p = pText->buffer;
628 788701 : sal_Unicode const * pEnd = p + pText->length;
629 788701 : sal_Int32 nCapacity = pText->length;
630 788701 : rtl_uString_new_WithLength(pResult, nCapacity);
631 53447955 : while (p < pEnd)
632 : {
633 : EscapeType eType;
634 51870556 : sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
635 51870556 : switch (eType)
636 : {
637 : case EscapeChar:
638 10924 : if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
639 : {
640 1 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
641 1 : break;
642 : }
643 : case EscapeNo:
644 51869541 : writeUcs4(pResult, &nCapacity, nUtf32);
645 51869541 : break;
646 :
647 : case EscapeOctet:
648 24 : if (eMechanism == rtl_UriDecodeStrict) {
649 3 : rtl_uString_new(pResult);
650 788704 : return;
651 : }
652 21 : writeEscapeOctet(pResult, &nCapacity, nUtf32);
653 21 : break;
654 : }
655 : }
656 788698 : *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
657 : }
658 788698 : break;
659 : }
660 : }
661 :
662 57764 : sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
663 : rtl_uString * pRelUriRef,
664 : rtl_uString ** pResult,
665 : rtl_uString ** pException)
666 : SAL_THROW_EXTERN_C()
667 : {
668 : // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the
669 : // relative URI into an absolute one:
670 57764 : rtl::OUStringBuffer aBuffer;
671 57764 : Components aRelComponents;
672 57764 : parseUriRef(pRelUriRef, &aRelComponents);
673 57764 : if (aRelComponents.aScheme.isPresent())
674 : {
675 : aBuffer.append(aRelComponents.aScheme.pBegin,
676 24145 : aRelComponents.aScheme.getLength());
677 24145 : if (aRelComponents.aAuthority.isPresent())
678 : aBuffer.append(aRelComponents.aAuthority.pBegin,
679 391 : aRelComponents.aAuthority.getLength());
680 : appendPath(
681 : aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin,
682 24145 : aRelComponents.aPath.pEnd);
683 24145 : if (aRelComponents.aQuery.isPresent())
684 : aBuffer.append(aRelComponents.aQuery.pBegin,
685 0 : aRelComponents.aQuery.getLength());
686 : }
687 : else
688 : {
689 33619 : Components aBaseComponents;
690 33619 : parseUriRef(pBaseUriRef, &aBaseComponents);
691 33619 : if (!aBaseComponents.aScheme.isPresent())
692 : {
693 : rtl_uString_assign(
694 : pException,
695 : (rtl::OUString(
696 2 : "<" + rtl::OUString(pBaseUriRef)
697 2 : + "> does not start with a scheme component")
698 1 : .pData));
699 1 : return false;
700 : }
701 : aBuffer.append(aBaseComponents.aScheme.pBegin,
702 33618 : aBaseComponents.aScheme.getLength());
703 33618 : if (aRelComponents.aAuthority.isPresent())
704 : {
705 : aBuffer.append(aRelComponents.aAuthority.pBegin,
706 1 : aRelComponents.aAuthority.getLength());
707 : appendPath(
708 : aBuffer, aBuffer.getLength(), false,
709 1 : aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
710 1 : if (aRelComponents.aQuery.isPresent())
711 : aBuffer.append(aRelComponents.aQuery.pBegin,
712 0 : aRelComponents.aQuery.getLength());
713 : }
714 : else
715 : {
716 33617 : if (aBaseComponents.aAuthority.isPresent())
717 : aBuffer.append(aBaseComponents.aAuthority.pBegin,
718 33616 : aBaseComponents.aAuthority.getLength());
719 33617 : if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd)
720 : {
721 : aBuffer.append(aBaseComponents.aPath.pBegin,
722 3 : aBaseComponents.aPath.getLength());
723 3 : if (aRelComponents.aQuery.isPresent())
724 : aBuffer.append(aRelComponents.aQuery.pBegin,
725 1 : aRelComponents.aQuery.getLength());
726 2 : else if (aBaseComponents.aQuery.isPresent())
727 : aBuffer.append(aBaseComponents.aQuery.pBegin,
728 2 : aBaseComponents.aQuery.getLength());
729 : }
730 : else
731 : {
732 33614 : if (aRelComponents.aPath.pBegin != aRelComponents.aPath.pEnd
733 33614 : && *aRelComponents.aPath.pBegin == '/')
734 : appendPath(
735 : aBuffer, aBuffer.getLength(), false,
736 3 : aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
737 67222 : else if (aBaseComponents.aAuthority.isPresent()
738 67221 : && aBaseComponents.aPath.pBegin
739 33610 : == aBaseComponents.aPath.pEnd)
740 : appendPath(
741 : aBuffer, aBuffer.getLength(), true,
742 1 : aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
743 : else
744 : {
745 33610 : sal_Int32 n = aBuffer.getLength();
746 : sal_Int32 i = rtl_ustr_lastIndexOfChar_WithLength(
747 : aBaseComponents.aPath.pBegin,
748 33610 : aBaseComponents.aPath.getLength(), '/');
749 33610 : if (i >= 0) {
750 : appendPath(
751 : aBuffer, n, false, aBaseComponents.aPath.pBegin,
752 33610 : aBaseComponents.aPath.pBegin + i);
753 : }
754 : appendPath(
755 : aBuffer, n, i >= 0, aRelComponents.aPath.pBegin,
756 33610 : aRelComponents.aPath.pEnd);
757 : }
758 33614 : if (aRelComponents.aQuery.isPresent())
759 : aBuffer.append(aRelComponents.aQuery.pBegin,
760 5 : aRelComponents.aQuery.getLength());
761 : }
762 : }
763 : }
764 57763 : if (aRelComponents.aFragment.isPresent())
765 : aBuffer.append(aRelComponents.aFragment.pBegin,
766 6 : aRelComponents.aFragment.getLength());
767 57763 : rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
768 57763 : return true;
769 : }
770 :
771 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|