LCOV - code coverage report
Current view: top level - sal/rtl - uri.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 277 284 97.5 %
Date: 2014-11-03 Functions: 17 17 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "surrogates.hxx"
      21             : 
      22             : #include "osl/diagnose.h"
      23             : #include "rtl/character.hxx"
      24             : #include "rtl/strbuf.hxx"
      25             : #include "rtl/textenc.h"
      26             : #include "rtl/textcvt.h"
      27             : #include "rtl/uri.h"
      28             : #include "rtl/ustrbuf.h"
      29             : #include "rtl/ustrbuf.hxx"
      30             : #include "rtl/ustring.h"
      31             : #include "rtl/ustring.hxx"
      32             : #include "sal/types.h"
      33             : #include "sal/macros.h"
      34             : 
      35             : #include <algorithm>
      36             : #include <cstddef>
      37             : 
      38             : namespace {
      39             : 
      40             : std::size_t const nCharClassSize = 128;
      41             : 
      42             : sal_Unicode const cEscapePrefix = 0x25; // '%'
      43             : 
      44       53668 : inline int getHexWeight(sal_uInt32 nUtf32)
      45             : {
      46       53506 :     return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
      47       35308 :                static_cast< int >(nUtf32 - 0x30) :
      48       18198 :            nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
      49       18090 :                static_cast< int >(nUtf32 - 0x41 + 10) :
      50         108 :            nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
      51         108 :                static_cast< int >(nUtf32 - 0x61 + 10) :
      52      107174 :                -1; // not a hex digit
      53             : }
      54             : 
      55    43129228 : inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
      56             : {
      57    43129228 :     return nUtf32 < nCharClassSize && pCharClass[nUtf32];
      58             : }
      59             : 
      60   148793805 : inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
      61             :                          sal_Unicode cChar)
      62             : {
      63   148793805 :     rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
      64   148793805 : }
      65             : 
      66             : enum EscapeType
      67             : {
      68             :     EscapeNo,
      69             :     EscapeChar,
      70             :     EscapeOctet
      71             : };
      72             : 
      73             : /* Read any of the following:
      74             : 
      75             :    - sequence of escape sequences representing character from eCharset,
      76             :      translated to single UCS4 character; or
      77             : 
      78             :    - pair of UTF-16 surrogates, translated to single UCS4 character; or
      79             : 
      80             :    _ single UTF-16 character, extended to UCS4 character.
      81             :  */
      82   148740448 : sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
      83             :                     bool bEncoded, rtl_TextEncoding eCharset,
      84             :                     EscapeType * pType)
      85             : {
      86   148740448 :     sal_uInt32 nChar = *(*pBegin)++;
      87             :     int nWeight1;
      88             :     int nWeight2;
      89   148763158 :     if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
      90       22686 :         && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
      91   148762972 :         && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
      92             :     {
      93       22524 :         *pBegin += 2;
      94       22524 :         nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
      95       22524 :         if (nChar <= 0x7F)
      96       14204 :             *pType = EscapeChar;
      97        8320 :         else if (eCharset == RTL_TEXTENCODING_UTF8)
      98             :         {
      99        4204 :             if (nChar >= 0xC0 && nChar <= 0xF4)
     100             :             {
     101             :                 sal_uInt32 nEncoded;
     102             :                 int nShift;
     103             :                 sal_uInt32 nMin;
     104        4170 :                 if (nChar <= 0xDF)
     105             :                 {
     106        4134 :                     nEncoded = (nChar & 0x1F) << 6;
     107        4134 :                     nShift = 0;
     108        4134 :                     nMin = 0x80;
     109             :                 }
     110          36 :                 else if (nChar <= 0xEF)
     111             :                 {
     112          32 :                     nEncoded = (nChar & 0x0F) << 12;
     113          32 :                     nShift = 6;
     114          32 :                     nMin = 0x800;
     115             :                 }
     116             :                 else
     117             :                 {
     118           4 :                     nEncoded = (nChar & 0x07) << 18;
     119           4 :                     nShift = 12;
     120           4 :                     nMin = 0x10000;
     121             :                 }
     122        4170 :                 sal_Unicode const * p = *pBegin;
     123        4170 :                 bool bUTF8 = true;
     124        8376 :                 for (; nShift >= 0; nShift -= 6)
     125             :                 {
     126       12624 :                     if (pEnd - p < 3 || p[0] != cEscapePrefix
     127        4208 :                         || (nWeight1 = getHexWeight(p[1])) < 8
     128        4208 :                         || nWeight1 > 11
     129        8414 :                         || (nWeight2 = getHexWeight(p[2])) < 0)
     130             :                     {
     131           2 :                         bUTF8 = false;
     132           2 :                         break;
     133             :                     }
     134        4206 :                     p += 3;
     135        4206 :                     nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
     136             :                 }
     137        8338 :                 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
     138        8326 :                     && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
     139             :                 {
     140        4152 :                     *pBegin = p;
     141        4152 :                     *pType = EscapeChar;
     142        4152 :                     return nEncoded;
     143             :                 }
     144             :             }
     145          52 :             *pType = EscapeOctet;
     146             :         }
     147             :         else
     148             :         {
     149        4116 :             rtl::OStringBuffer aBuf;
     150        4116 :             aBuf.append(static_cast< char >(nChar));
     151             :             rtl_TextToUnicodeConverter aConverter
     152        4116 :                 = rtl_createTextToUnicodeConverter(eCharset);
     153        4116 :             sal_Unicode const * p = *pBegin;
     154             :             for (;;)
     155             :             {
     156             :                 sal_Unicode aDst[2];
     157             :                 sal_uInt32 nInfo;
     158             :                 sal_Size nConverted;
     159             :                 sal_Size nDstSize = rtl_convertTextToUnicode(
     160        4144 :                     aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
     161             :                     SAL_N_ELEMENTS( aDst ),
     162             :                     (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
     163             :                      | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
     164             :                      | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
     165        4144 :                     &nInfo, &nConverted);
     166        4144 :                 if (nInfo == 0)
     167             :                 {
     168             :                     assert( nConverted
     169             :                         == sal::static_int_cast< sal_uInt32 >(
     170             :                             aBuf.getLength()));
     171        4112 :                     rtl_destroyTextToUnicodeConverter(aConverter);
     172        4112 :                     *pBegin = p;
     173        4112 :                     *pType = EscapeChar;
     174             :                     assert( nDstSize == 1
     175             :                         || (nDstSize == 2 && isHighSurrogate(aDst[0])
     176             :                             && isLowSurrogate(aDst[1])));
     177             :                     return nDstSize == 1
     178        4112 :                         ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
     179             :                 }
     180          64 :                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
     181          28 :                          && pEnd - p >= 3 && p[0] == cEscapePrefix
     182          22 :                          && (nWeight1 = getHexWeight(p[1])) >= 0
     183          54 :                          && (nWeight2 = getHexWeight(p[2])) >= 0)
     184             :                 {
     185          22 :                     p += 3;
     186          22 :                     aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
     187             :                 }
     188          10 :                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
     189           6 :                          && p != pEnd && *p <= 0x7F)
     190             :                 {
     191           6 :                     aBuf.append(static_cast< char >(*p++));
     192             :                 }
     193             :                 else
     194             :                 {
     195             :                     assert(
     196             :                         (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
     197             :                         == 0);
     198           4 :                     break;
     199             :                 }
     200          28 :             }
     201           4 :             rtl_destroyTextToUnicodeConverter(aConverter);
     202           4 :             *pType = EscapeOctet;
     203             :         }
     204       14260 :         return nChar;
     205             :     }
     206             :     else
     207             :     {
     208   148717924 :         *pType = EscapeNo;
     209   148717938 :         return isHighSurrogate(nChar) && *pBegin < pEnd
     210          14 :                && isLowSurrogate(**pBegin) ?
     211   148717932 :                    combineSurrogates(nChar, *(*pBegin)++) : nChar;
     212             :     }
     213             : }
     214             : 
     215   105611079 : void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
     216             : {
     217             :     assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
     218   105611079 :     if (nUtf32 <= 0xFFFF) {
     219             :         writeUnicode(
     220   105611073 :             pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
     221             :     } else {
     222           6 :         nUtf32 -= 0x10000;
     223             :         writeUnicode(
     224             :             pBuffer, pCapacity,
     225           6 :             static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
     226             :         writeUnicode(
     227             :             pBuffer, pCapacity,
     228           6 :             static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
     229             :     }
     230   105611077 : }
     231             : 
     232       24602 : void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
     233             :                       sal_uInt32 nOctet)
     234             : {
     235             :     assert(nOctet <= 0xFF); // bad octet
     236             : 
     237             :     static sal_Unicode const aHex[16]
     238             :         = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
     239             :             0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
     240             : 
     241       24602 :     writeUnicode(pBuffer, pCapacity, cEscapePrefix);
     242       24602 :     writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
     243       24602 :     writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
     244       24602 : }
     245             : 
     246       20396 : bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
     247             :                      sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
     248             : {
     249             :     assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
     250       20396 :     if (eCharset == RTL_TEXTENCODING_UTF8) {
     251        8740 :         if (nUtf32 < 0x80)
     252        4618 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32);
     253        4122 :         else if (nUtf32 < 0x800)
     254             :         {
     255        4102 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
     256        4102 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     257             :         }
     258          20 :         else if (nUtf32 < 0x10000)
     259             :         {
     260          14 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
     261          14 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
     262          14 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     263             :         }
     264             :         else
     265             :         {
     266           6 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
     267           6 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
     268           6 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
     269           6 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     270             :         }
     271             :     } else {
     272             :         rtl_UnicodeToTextConverter aConverter
     273       11656 :             = rtl_createUnicodeToTextConverter(eCharset);
     274             :         sal_Unicode aSrc[2];
     275             :         sal_Size nSrcSize;
     276       11656 :         if (nUtf32 <= 0xFFFF)
     277             :         {
     278       11654 :             aSrc[0] = static_cast< sal_Unicode >(nUtf32);
     279       11654 :             nSrcSize = 1;
     280             :         }
     281             :         else
     282             :         {
     283             :             aSrc[0] = static_cast< sal_Unicode >(
     284           2 :                 ((nUtf32 - 0x10000) >> 10) | 0xD800);
     285             :             aSrc[1] = static_cast< sal_Unicode >(
     286           2 :                 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
     287           2 :             nSrcSize = 2;
     288             :         }
     289             :         sal_Char aDst[32]; // FIXME  random value
     290             :         sal_uInt32 nInfo;
     291             :         sal_Size nConverted;
     292             :         sal_Size nDstSize = rtl_convertUnicodeToText(
     293             :             aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
     294             :             RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
     295             :             | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
     296             :             | RTL_UNICODETOTEXT_FLAGS_FLUSH,
     297       11656 :             &nInfo, &nConverted);
     298             :         assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
     299       11656 :         rtl_destroyUnicodeToTextConverter(aConverter);
     300       11656 :         if (nInfo == 0) {
     301             :             assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
     302       23312 :             for (sal_Size i = 0; i < nDstSize; ++i)
     303             :                 writeEscapeOctet(pBuffer, pCapacity,
     304       11662 :                                  static_cast< unsigned char >(aDst[i]));
     305             :                     // FIXME  all octets are escaped, even if there is no need
     306             :         } else {
     307           6 :             if (bStrict) {
     308           4 :                 return false;
     309             :             } else {
     310           2 :                 writeUcs4(pBuffer, pCapacity, nUtf32);
     311             :             }
     312             :         }
     313             :     }
     314       20392 :     return true;
     315             : }
     316             : 
     317             : struct Component
     318             : {
     319             :     sal_Unicode const * pBegin;
     320             :     sal_Unicode const * pEnd;
     321             : 
     322      976610 :     inline Component(): pBegin(0), pEnd(0) {}
     323             : 
     324      707130 :     inline bool isPresent() const { return pBegin != 0; }
     325             : 
     326             :     inline sal_Int32 getLength() const;
     327             : };
     328             : 
     329      269484 : inline sal_Int32 Component::getLength() const
     330             : {
     331             :     assert(isPresent()); // taking length of non-present component
     332      269484 :     return static_cast< sal_Int32 >(pEnd - pBegin);
     333             : }
     334             : 
     335      195322 : struct Components
     336             : {
     337             :     Component aScheme;
     338             :     Component aAuthority;
     339             :     Component aPath;
     340             :     Component aQuery;
     341             :     Component aFragment;
     342             : };
     343             : 
     344      195322 : void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
     345             : {
     346             :     // This algorithm is liberal and accepts various forms of illegal input.
     347             : 
     348      195322 :     sal_Unicode const * pBegin = pUriRef->buffer;
     349      195322 :     sal_Unicode const * pEnd = pBegin + pUriRef->length;
     350      195322 :     sal_Unicode const * pPos = pBegin;
     351             : 
     352      195322 :     if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
     353             :     {
     354     1932129 :         for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
     355             :         {
     356     1878919 :             if (*p == ':')
     357             :             {
     358      121184 :                 pComponents->aScheme.pBegin = pBegin;
     359      121184 :                 pComponents->aScheme.pEnd = ++p;
     360      121184 :                 pPos = p;
     361      121184 :                 break;
     362             :             }
     363     3729572 :             else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
     364     1971837 :                      && *p != '.')
     365             :             {
     366       20876 :                 break;
     367             :             }
     368             :         }
     369             :     }
     370             : 
     371      195322 :     if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
     372             :     {
     373       74150 :         pComponents->aAuthority.pBegin = pPos;
     374       74150 :         pPos += 2;
     375      148392 :         while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
     376          92 :             ++pPos;
     377       74150 :         pComponents->aAuthority.pEnd = pPos;
     378             :     }
     379             : 
     380      195322 :     pComponents->aPath.pBegin = pPos;
     381     6691858 :     while (pPos != pEnd && *pPos != '?' && * pPos != '#')
     382     6301214 :         ++pPos;
     383      195322 :     pComponents->aPath.pEnd = pPos;
     384             : 
     385      195322 :     if (pPos != pEnd && *pPos == '?')
     386             :     {
     387          96 :         pComponents->aQuery.pBegin = pPos++;
     388         306 :         while (pPos != pEnd && * pPos != '#')
     389         114 :             ++pPos;
     390          96 :         pComponents->aQuery.pEnd = pPos;
     391             :     }
     392             : 
     393      195322 :     if (pPos != pEnd)
     394             :     {
     395             :         assert(*pPos == '#');
     396          12 :         pComponents->aFragment.pBegin = pPos;
     397          12 :         pComponents->aFragment.pEnd = pEnd;
     398             :     }
     399      195322 : }
     400             : 
     401      195296 : void appendPath(
     402             :     rtl::OUStringBuffer & buffer, sal_Int32 bufferStart, bool precedingSlash,
     403             :     sal_Unicode const * pathBegin, sal_Unicode const * pathEnd)
     404             : {
     405     1096318 :     while (precedingSlash || pathBegin != pathEnd) {
     406      705726 :         sal_Unicode const * p = pathBegin;
     407     6259373 :         while (p != pathEnd && *p != '/') {
     408     4847921 :             ++p;
     409             :         }
     410      705726 :         std::size_t n = p - pathBegin;
     411      705726 :         if (n == 1 && pathBegin[0] == '.') {
     412             :             // input begins with "." -> remove from input (and done):
     413             :             //  i.e., !precedingSlash -> !precedingSlash
     414             :             // input begins with "./" -> remove from input:
     415             :             //  i.e., !precedingSlash -> !precedingSlash
     416             :             // input begins with "/." -> replace with "/" in input (and not yet
     417             :             // done):
     418             :             //  i.e., precedingSlash -> precedingSlash
     419             :             // input begins with "/./" -> replace with "/" in input:
     420             :             //  i.e., precedingSlash -> precedingSlash
     421      705702 :         } else if (n == 2 && pathBegin[0] == '.' && pathBegin[1] == '.') {
     422             :             // input begins with ".." -> remove from input (and done):
     423             :             //  i.e., !precedingSlash -> !precedingSlash
     424             :             // input begins with "../" -> remove from input
     425             :             //  i.e., !precedingSlash -> !precedingSlash
     426             :             // input begins with "/.." -> replace with "/" in input, and shrink
     427             :             // output (not not yet done):
     428             :             //  i.e., precedingSlash -> precedingSlash
     429             :             // input begins with "/../" -> replace with "/" in input, and shrink
     430             :             // output:
     431             :             //  i.e., precedingSlash -> precedingSlash
     432         108 :             if (precedingSlash) {
     433             :                 buffer.truncate(
     434             :                     bufferStart
     435             :                     + std::max<sal_Int32>(
     436             :                         rtl_ustr_lastIndexOfChar_WithLength(
     437          54 :                             buffer.getStr() + bufferStart,
     438         108 :                             buffer.getLength() - bufferStart, '/'),
     439         108 :                         0));
     440             :             }
     441             :         } else {
     442      705648 :             if (precedingSlash) {
     443      584474 :                 buffer.append('/');
     444             :             }
     445      705648 :             buffer.append(pathBegin, n);
     446      705648 :             precedingSlash = p != pathEnd;
     447             :         }
     448      705726 :         pathBegin = p + (p == pathEnd ? 0 : 1);
     449             :     }
     450      195296 : }
     451             : 
     452             : }
     453             : 
     454       25438 : sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
     455             :     SAL_THROW_EXTERN_C()
     456             : {
     457             :     static sal_Bool const aCharClass[][nCharClassSize]
     458             :     = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
     459             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     460             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
     461             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
     462             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
     463             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
     464             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
     465             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  /*pqrstuvwxyz{|}~ */
     466             :        },
     467             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
     468             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     469             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
     470             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
     471             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     472             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     473             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     474             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     475             :        },
     476             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
     477             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     478             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     479             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
     480             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     481             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     482             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     483             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     484             :        },
     485             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
     486             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     487             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     488             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     489             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     490             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     491             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     492             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     493             :        },
     494             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
     495             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     496             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     497             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     498             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     499             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     500             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     501             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     502             :        },
     503             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
     504             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     505             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     506             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     507             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     508             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     509             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     510             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     511             :        },
     512             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
     513             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     514             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     515             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
     516             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     517             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     518             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     519             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     520             :        },
     521             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
     522             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     523             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
     524             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
     525             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     526             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     527             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     528             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     529             :        }};
     530             :     assert(
     531             :         (eCharClass >= 0
     532             :          && (sal::static_int_cast< std::size_t >(eCharClass)
     533             :              < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
     534       25438 :     return aCharClass[eCharClass];
     535             : }
     536             : 
     537      665430 : void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
     538             :                             rtl_UriEncodeMechanism eMechanism,
     539             :                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
     540             :     SAL_THROW_EXTERN_C()
     541             : {
     542             :     assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
     543             : 
     544      665430 :     sal_Unicode const * p = pText->buffer;
     545      665430 :     sal_Unicode const * pEnd = p + pText->length;
     546      665430 :     sal_Int32 nCapacity = pText->length;
     547      665430 :     rtl_uString_new_WithLength(pResult, nCapacity);
     548      665430 :     while (p < pEnd)
     549             :     {
     550             :         EscapeType eType;
     551             :         sal_uInt32 nUtf32 = readUcs4(
     552             :             &p, pEnd,
     553             :             (eMechanism == rtl_UriEncodeKeepEscapes
     554    43072808 :              || eMechanism == rtl_UriEncodeCheckEscapes
     555    86187770 :              || eMechanism == rtl_UriEncodeStrictKeepEscapes),
     556    86258644 :             eCharset, &eType);
     557    43129322 :         switch (eType)
     558             :         {
     559             :         case EscapeNo:
     560    43129222 :             if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
     561             :                 writeUnicode(pResult, &nCapacity,
     562    43108918 :                              static_cast< sal_Unicode >(nUtf32));
     563       20304 :             else if (!writeEscapeChar(
     564             :                          pResult, &nCapacity, nUtf32, eCharset,
     565             :                          (eMechanism == rtl_UriEncodeStrict
     566       20304 :                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
     567             :             {
     568           4 :                 rtl_uString_new(pResult);
     569           4 :                 return;
     570             :             }
     571    43129218 :             break;
     572             : 
     573             :         case EscapeChar:
     574          92 :             if (eMechanism == rtl_UriEncodeCheckEscapes
     575          92 :                 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
     576             :                 writeUnicode(pResult, &nCapacity,
     577           0 :                              static_cast< sal_Unicode >(nUtf32));
     578          92 :             else if (!writeEscapeChar(
     579             :                          pResult, &nCapacity, nUtf32, eCharset,
     580             :                          (eMechanism == rtl_UriEncodeStrict
     581          92 :                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
     582             :             {
     583           0 :                 rtl_uString_new(pResult);
     584           0 :                 return;
     585             :             }
     586          92 :             break;
     587             : 
     588             :         case EscapeOctet:
     589           8 :             writeEscapeOctet(pResult, &nCapacity, nUtf32);
     590           8 :             break;
     591             :         }
     592             :     }
     593      665426 :     *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
     594             : }
     595             : 
     596     1661925 : void SAL_CALL rtl_uriDecode(rtl_uString * pText,
     597             :                             rtl_UriDecodeMechanism eMechanism,
     598             :                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
     599             :     SAL_THROW_EXTERN_C()
     600             : {
     601     1661925 :     switch (eMechanism)
     602             :     {
     603             :     case rtl_UriDecodeNone:
     604           0 :         rtl_uString_assign(pResult, pText);
     605           0 :         break;
     606             : 
     607             :     case rtl_UriDecodeToIuri:
     608          10 :         eCharset = RTL_TEXTENCODING_UTF8;
     609             :         //fall-through
     610             :     default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
     611             :         {
     612     1661925 :             sal_Unicode const * p = pText->buffer;
     613     1661925 :             sal_Unicode const * pEnd = p + pText->length;
     614     1661925 :             sal_Int32 nCapacity = pText->length;
     615     1661925 :             rtl_uString_new_WithLength(pResult, nCapacity);
     616   108934970 :             while (p < pEnd)
     617             :             {
     618             :                 EscapeType eType;
     619   105611126 :                 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
     620   105611128 :                 switch (eType)
     621             :                 {
     622             :                 case EscapeChar:
     623       22376 :                     if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
     624             :                     {
     625           2 :                         writeEscapeOctet(pResult, &nCapacity, nUtf32);
     626           2 :                         break;
     627             :                     }
     628             :                 case EscapeNo:
     629   105611078 :                     writeUcs4(pResult, &nCapacity, nUtf32);
     630   105611076 :                     break;
     631             : 
     632             :                 case EscapeOctet:
     633          48 :                     if (eMechanism == rtl_UriDecodeStrict) {
     634           6 :                         rtl_uString_new(pResult);
     635     1661931 :                         return;
     636             :                     }
     637          42 :                     writeEscapeOctet(pResult, &nCapacity, nUtf32);
     638          42 :                     break;
     639             :                 }
     640             :             }
     641     1661919 :             *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
     642             :         }
     643     1661919 :         break;
     644             :     }
     645             : }
     646             : 
     647      121186 : sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
     648             :                                          rtl_uString * pRelUriRef,
     649             :                                          rtl_uString ** pResult,
     650             :                                          rtl_uString ** pException)
     651             :     SAL_THROW_EXTERN_C()
     652             : {
     653             :     // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the
     654             :     // relative URI into an absolute one:
     655      121186 :     rtl::OUStringBuffer aBuffer;
     656      121186 :     Components aRelComponents;
     657      121186 :     parseUriRef(pRelUriRef, &aRelComponents);
     658      121186 :     if (aRelComponents.aScheme.isPresent())
     659             :     {
     660             :         aBuffer.append(aRelComponents.aScheme.pBegin,
     661       47050 :                        aRelComponents.aScheme.getLength());
     662       47050 :         if (aRelComponents.aAuthority.isPresent())
     663             :             aBuffer.append(aRelComponents.aAuthority.pBegin,
     664          16 :                            aRelComponents.aAuthority.getLength());
     665             :         appendPath(
     666             :             aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin,
     667       47050 :             aRelComponents.aPath.pEnd);
     668       47050 :         if (aRelComponents.aQuery.isPresent())
     669             :             aBuffer.append(aRelComponents.aQuery.pBegin,
     670           0 :                            aRelComponents.aQuery.getLength());
     671             :     }
     672             :     else
     673             :     {
     674       74136 :         Components aBaseComponents;
     675       74136 :         parseUriRef(pBaseUriRef, &aBaseComponents);
     676       74136 :         if (!aBaseComponents.aScheme.isPresent())
     677             :         {
     678             :             rtl_uString_assign(
     679             :                 pException,
     680             :                 (rtl::OUString(
     681           4 :                     "<" + rtl::OUString(pBaseUriRef)
     682           4 :                     + "> does not start with a scheme component")
     683           2 :                  .pData));
     684           2 :             return false;
     685             :         }
     686             :         aBuffer.append(aBaseComponents.aScheme.pBegin,
     687       74134 :                        aBaseComponents.aScheme.getLength());
     688       74134 :         if (aRelComponents.aAuthority.isPresent())
     689             :         {
     690             :             aBuffer.append(aRelComponents.aAuthority.pBegin,
     691           2 :                            aRelComponents.aAuthority.getLength());
     692             :             appendPath(
     693             :                 aBuffer, aBuffer.getLength(), false,
     694           2 :                 aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
     695           2 :             if (aRelComponents.aQuery.isPresent())
     696             :                 aBuffer.append(aRelComponents.aQuery.pBegin,
     697           0 :                                aRelComponents.aQuery.getLength());
     698             :         }
     699             :         else
     700             :         {
     701       74132 :             if (aBaseComponents.aAuthority.isPresent())
     702             :                 aBuffer.append(aBaseComponents.aAuthority.pBegin,
     703       74130 :                                aBaseComponents.aAuthority.getLength());
     704       74132 :             if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd)
     705             :             {
     706             :                 aBuffer.append(aBaseComponents.aPath.pBegin,
     707           6 :                                aBaseComponents.aPath.getLength());
     708           6 :                 if (aRelComponents.aQuery.isPresent())
     709             :                     aBuffer.append(aRelComponents.aQuery.pBegin,
     710           2 :                                    aRelComponents.aQuery.getLength());
     711           4 :                 else if (aBaseComponents.aQuery.isPresent())
     712             :                     aBuffer.append(aBaseComponents.aQuery.pBegin,
     713           4 :                                    aBaseComponents.aQuery.getLength());
     714             :             }
     715             :             else
     716             :             {
     717       74126 :                 if (aRelComponents.aPath.pBegin != aRelComponents.aPath.pEnd
     718       74126 :                     && *aRelComponents.aPath.pBegin == '/')
     719             :                     appendPath(
     720             :                         aBuffer, aBuffer.getLength(), false,
     721           6 :                         aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
     722      148240 :                 else if (aBaseComponents.aAuthority.isPresent()
     723      148238 :                          && aBaseComponents.aPath.pBegin
     724       74118 :                             == aBaseComponents.aPath.pEnd)
     725             :                     appendPath(
     726             :                         aBuffer, aBuffer.getLength(), true,
     727           2 :                         aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
     728             :                 else
     729             :                 {
     730       74118 :                     sal_Int32 n = aBuffer.getLength();
     731             :                     sal_Int32 i = rtl_ustr_lastIndexOfChar_WithLength(
     732             :                         aBaseComponents.aPath.pBegin,
     733       74118 :                         aBaseComponents.aPath.getLength(), '/');
     734       74118 :                     if (i >= 0) {
     735             :                         appendPath(
     736             :                             aBuffer, n, false, aBaseComponents.aPath.pBegin,
     737       74118 :                             aBaseComponents.aPath.pBegin + i);
     738             :                     }
     739             :                     appendPath(
     740             :                         aBuffer, n, i >= 0, aRelComponents.aPath.pBegin,
     741       74118 :                         aRelComponents.aPath.pEnd);
     742             :                 }
     743       74126 :                 if (aRelComponents.aQuery.isPresent())
     744             :                     aBuffer.append(aRelComponents.aQuery.pBegin,
     745          10 :                                    aRelComponents.aQuery.getLength());
     746             :             }
     747             :         }
     748             :     }
     749      121184 :     if (aRelComponents.aFragment.isPresent())
     750             :         aBuffer.append(aRelComponents.aFragment.pBegin,
     751          12 :                        aRelComponents.aFragment.getLength());
     752      121184 :     rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
     753      121184 :     return true;
     754             : }
     755             : 
     756             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10