LCOV - commit c8344322a7af75b84dd3ca8f78b05543a976dfd5

LCOV - code coverage report

Current view:	top level - sal/rtl - uri.cxx (source / functions)		Hit	Total	Coverage
Test:	commit c8344322a7af75b84dd3ca8f78b05543a976dfd5	Lines:	278	285	97.5 %
Date:	2015-06-13 12:38:46	Functions:	17	17	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "osl/diagnose.h"
      21             : #include "rtl/character.hxx"
      22             : #include "rtl/strbuf.hxx"
      23             : #include "rtl/textenc.h"
      24             : #include "rtl/textcvt.h"
      25             : #include "rtl/uri.h"
      26             : #include "rtl/ustrbuf.h"
      27             : #include "rtl/ustrbuf.hxx"
      28             : #include "rtl/ustring.h"
      29             : #include "rtl/ustring.hxx"
      30             : #include "sal/types.h"
      31             : #include "sal/macros.h"
      32             : 
      33             : #include <algorithm>
      34             : #include <cstddef>
      35             : 
      36             : namespace {
      37             : 
      38             : std::size_t const nCharClassSize = 128;
      39             : 
      40             : sal_Unicode const cEscapePrefix = 0x25; // '%'
      41             : 
      42       25094 : inline int getHexWeight(sal_uInt32 nUtf32)
      43             : {
      44       25013 :     return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
      45       15924 :                static_cast< int >(nUtf32 - 0x30) :
      46        9089 :            nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
      47        9035 :                static_cast< int >(nUtf32 - 0x41 + 10) :
      48          54 :            nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
      49          54 :                static_cast< int >(nUtf32 - 0x61 + 10) :
      50       50107 :                -1; // not a hex digit
      51             : }
      52             : 
      53    32338570 : inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
      54             : {
      55    32338570 :     return nUtf32 < nCharClassSize && pCharClass[nUtf32];
      56             : }
      57             : 
      58   119285446 : inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
      59             :                          sal_Unicode cChar)
      60             : {
      61   119285446 :     rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
      62   119285443 : }
      63             : 
      64             : enum EscapeType
      65             : {
      66             :     EscapeNo,
      67             :     EscapeChar,
      68             :     EscapeOctet
      69             : };
      70             : 
      71             : /* Read any of the following:
      72             : 
      73             :    - sequence of escape sequences representing character from eCharset,
      74             :      translated to single UCS4 character; or
      75             : 
      76             :    - pair of UTF-16 surrogates, translated to single UCS4 character; or
      77             : 
      78             :    _ single UTF-16 character, extended to UCS4 character.
      79             :  */
      80   119258688 : sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
      81             :                     bool bEncoded, rtl_TextEncoding eCharset,
      82             :                     EscapeType * pType)
      83             : {
      84   119258688 :     sal_uInt32 nChar = *(*pBegin)++;
      85             :     int nWeight1;
      86             :     int nWeight2;
      87   119269173 :     if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
      88       10473 :         && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
      89   119269080 :         && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
      90             :     {
      91       10392 :         *pBegin += 2;
      92       10392 :         nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
      93       10392 :         if (nChar <= 0x7F)
      94        6232 :             *pType = EscapeChar;
      95        4160 :         else if (eCharset == RTL_TEXTENCODING_UTF8)
      96             :         {
      97        2102 :             if (nChar >= 0xC0 && nChar <= 0xF4)
      98             :             {
      99             :                 sal_uInt32 nEncoded;
     100             :                 int nShift;
     101             :                 sal_uInt32 nMin;
     102        2085 :                 if (nChar <= 0xDF)
     103             :                 {
     104        2067 :                     nEncoded = (nChar & 0x1F) << 6;
     105        2067 :                     nShift = 0;
     106        2067 :                     nMin = 0x80;
     107             :                 }
     108          18 :                 else if (nChar <= 0xEF)
     109             :                 {
     110          16 :                     nEncoded = (nChar & 0x0F) << 12;
     111          16 :                     nShift = 6;
     112          16 :                     nMin = 0x800;
     113             :                 }
     114             :                 else
     115             :                 {
     116           2 :                     nEncoded = (nChar & 0x07) << 18;
     117           2 :                     nShift = 12;
     118           2 :                     nMin = 0x10000;
     119             :                 }
     120        2085 :                 sal_Unicode const * p = *pBegin;
     121        2085 :                 bool bUTF8 = true;
     122        4188 :                 for (; nShift >= 0; nShift -= 6)
     123             :                 {
     124        6312 :                     if (pEnd - p < 3 || p[0] != cEscapePrefix
     125        2104 :                         || (nWeight1 = getHexWeight(p[1])) < 8
     126        2104 :                         || nWeight1 > 11
     127        4207 :                         || (nWeight2 = getHexWeight(p[2])) < 0)
     128             :                     {
     129           1 :                         bUTF8 = false;
     130           1 :                         break;
     131             :                     }
     132        2103 :                     p += 3;
     133        2103 :                     nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
     134             :                 }
     135        4169 :                 if (bUTF8 && nEncoded >= nMin && nEncoded <= 0x10FFFF
     136        2083 :                     && !rtl::isHighSurrogate(nEncoded)
     137        4163 :                     && !rtl::isLowSurrogate(nEncoded))
     138             :                 {
     139        2076 :                     *pBegin = p;
     140        2076 :                     *pType = EscapeChar;
     141        2076 :                     return nEncoded;
     142             :                 }
     143             :             }
     144          26 :             *pType = EscapeOctet;
     145             :         }
     146             :         else
     147             :         {
     148        2058 :             rtl::OStringBuffer aBuf;
     149        2058 :             aBuf.append(static_cast< char >(nChar));
     150             :             rtl_TextToUnicodeConverter aConverter
     151        2058 :                 = rtl_createTextToUnicodeConverter(eCharset);
     152        2058 :             sal_Unicode const * p = *pBegin;
     153             :             for (;;)
     154             :             {
     155             :                 sal_Unicode aDst[2];
     156             :                 sal_uInt32 nInfo;
     157             :                 sal_Size nConverted;
     158             :                 sal_Size nDstSize = rtl_convertTextToUnicode(
     159        2072 :                     aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
     160             :                     SAL_N_ELEMENTS( aDst ),
     161             :                     (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
     162             :                      | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
     163             :                      | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
     164        2072 :                     &nInfo, &nConverted);
     165        2072 :                 if (nInfo == 0)
     166             :                 {
     167             :                     assert( nConverted
     168             :                         == sal::static_int_cast< sal_uInt32 >(
     169             :                             aBuf.getLength()));
     170        2056 :                     rtl_destroyTextToUnicodeConverter(aConverter);
     171        2056 :                     *pBegin = p;
     172        2056 :                     *pType = EscapeChar;
     173             :                     assert( nDstSize == 1
     174             :                         || (nDstSize == 2 && rtl::isHighSurrogate(aDst[0])
     175             :                             && rtl::isLowSurrogate(aDst[1])));
     176             :                     return nDstSize == 1
     177        2056 :                         ? aDst[0] : rtl::combineSurrogates(aDst[0], aDst[1]);
     178             :                 }
     179          32 :                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
     180          14 :                          && pEnd - p >= 3 && p[0] == cEscapePrefix
     181          11 :                          && (nWeight1 = getHexWeight(p[1])) >= 0
     182          27 :                          && (nWeight2 = getHexWeight(p[2])) >= 0)
     183             :                 {
     184          11 :                     p += 3;
     185          11 :                     aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
     186             :                 }
     187           5 :                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
     188           3 :                          && p != pEnd && *p <= 0x7F)
     189             :                 {
     190           3 :                     aBuf.append(static_cast< char >(*p++));
     191             :                 }
     192             :                 else
     193             :                 {
     194             :                     assert(
     195             :                         (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
     196             :                         == 0);
     197           2 :                     break;
     198             :                 }
     199          14 :             }
     200           2 :             rtl_destroyTextToUnicodeConverter(aConverter);
     201           2 :             *pType = EscapeOctet;
     202             :         }
     203        6260 :         return nChar;
     204             :     }
     205             :     else
     206             :     {
     207   119248296 :         *pType = EscapeNo;
     208   119248303 :         return rtl::isHighSurrogate(nChar) && *pBegin < pEnd
     209           7 :                && rtl::isLowSurrogate(**pBegin) ?
     210   119248303 :                    rtl::combineSurrogates(nChar, *(*pBegin)++) : nChar;
     211             :     }
     212             : }
     213             : 
     214    86920096 : void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
     215             : {
     216             :     assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
     217    86920096 :     if (nUtf32 <= 0xFFFF) {
     218             :         writeUnicode(
     219    86920093 :             pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
     220             :     } else {
     221           3 :         nUtf32 -= 0x10000;
     222             :         writeUnicode(
     223             :             pBuffer, pCapacity,
     224           3 :             static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
     225             :         writeUnicode(
     226             :             pBuffer, pCapacity,
     227           3 :             static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
     228             :     }
     229    86920096 : }
     230             : 
     231       12339 : void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
     232             :                       sal_uInt32 nOctet)
     233             : {
     234             :     assert(nOctet <= 0xFF); // bad octet
     235             : 
     236             :     static sal_Unicode const aHex[16]
     237             :         = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
     238             :             0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
     239             : 
     240       12339 :     writeUnicode(pBuffer, pCapacity, cEscapePrefix);
     241       12339 :     writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
     242       12339 :     writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
     243       12339 : }
     244             : 
     245       10236 : bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
     246             :                      sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
     247             : {
     248             :     assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
     249       10236 :     if (eCharset == RTL_TEXTENCODING_UTF8) {
     250        4408 :         if (nUtf32 < 0x80)
     251        2347 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32);
     252        2061 :         else if (nUtf32 < 0x800)
     253             :         {
     254        2051 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
     255        2051 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     256             :         }
     257          10 :         else if (nUtf32 < 0x10000)
     258             :         {
     259           7 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
     260           7 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
     261           7 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     262             :         }
     263             :         else
     264             :         {
     265           3 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
     266           3 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
     267           3 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
     268           3 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     269             :         }
     270             :     } else {
     271             :         rtl_UnicodeToTextConverter aConverter
     272        5828 :             = rtl_createUnicodeToTextConverter(eCharset);
     273             :         sal_Unicode aSrc[2];
     274             :         sal_Size nSrcSize;
     275        5828 :         if (nUtf32 <= 0xFFFF)
     276             :         {
     277        5827 :             aSrc[0] = static_cast< sal_Unicode >(nUtf32);
     278        5827 :             nSrcSize = 1;
     279             :         }
     280             :         else
     281             :         {
     282             :             aSrc[0] = static_cast< sal_Unicode >(
     283           1 :                 ((nUtf32 - 0x10000) >> 10) | 0xD800);
     284             :             aSrc[1] = static_cast< sal_Unicode >(
     285           1 :                 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
     286           1 :             nSrcSize = 2;
     287             :         }
     288             :         sal_Char aDst[32]; // FIXME  random value
     289             :         sal_uInt32 nInfo;
     290             :         sal_Size nConverted;
     291             :         sal_Size nDstSize = rtl_convertUnicodeToText(
     292             :             aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
     293             :             RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
     294             :             | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
     295             :             | RTL_UNICODETOTEXT_FLAGS_FLUSH,
     296        5828 :             &nInfo, &nConverted);
     297             :         assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
     298        5828 :         rtl_destroyUnicodeToTextConverter(aConverter);
     299        5828 :         if (nInfo == 0) {
     300             :             assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
     301       11656 :             for (sal_Size i = 0; i < nDstSize; ++i)
     302             :                 writeEscapeOctet(pBuffer, pCapacity,
     303        5831 :                                  static_cast< unsigned char >(aDst[i]));
     304             :                     // FIXME  all octets are escaped, even if there is no need
     305             :         } else {
     306           3 :             if (bStrict) {
     307           2 :                 return false;
     308             :             } else {
     309           1 :                 writeUcs4(pBuffer, pCapacity, nUtf32);
     310             :             }
     311             :         }
     312             :     }
     313       10234 :     return true;
     314             : }
     315             : 
     316             : struct Component
     317             : {
     318             :     sal_Unicode const * pBegin;
     319             :     sal_Unicode const * pEnd;
     320             : 
     321      628150 :     inline Component(): pBegin(0), pEnd(0) {}
     322             : 
     323      456801 :     inline bool isPresent() const { return pBegin != 0; }
     324             : 
     325             :     inline sal_Int32 getLength() const;
     326             : };
     327             : 
     328      171362 : inline sal_Int32 Component::getLength() const
     329             : {
     330             :     assert(isPresent()); // taking length of non-present component
     331      171362 :     return static_cast< sal_Int32 >(pEnd - pBegin);
     332             : }
     333             : 
     334      125630 : struct Components
     335             : {
     336             :     Component aScheme;
     337             :     Component aAuthority;
     338             :     Component aPath;
     339             :     Component aQuery;
     340             :     Component aFragment;
     341             : };
     342             : 
     343      125630 : void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
     344             : {
     345             :     // This algorithm is liberal and accepts various forms of illegal input.
     346             : 
     347      125630 :     sal_Unicode const * pBegin = pUriRef->buffer;
     348      125630 :     sal_Unicode const * pEnd = pBegin + pUriRef->length;
     349      125630 :     sal_Unicode const * pPos = pBegin;
     350             : 
     351      125630 :     if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
     352             :     {
     353     1282710 :         for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
     354             :         {
     355     1251464 :             if (*p == ':')
     356             :             {
     357       79921 :                 pComponents->aScheme.pBegin = pBegin;
     358       79921 :                 pComponents->aScheme.pEnd = ++p;
     359       79921 :                 pPos = p;
     360       79921 :                 break;
     361             :             }
     362     2490554 :             else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
     363     1319011 :                      && *p != '.')
     364             :             {
     365       14437 :                 break;
     366             :             }
     367             :         }
     368             :     }
     369             : 
     370      125630 :     if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
     371             :     {
     372       45726 :         pComponents->aAuthority.pBegin = pPos;
     373       45726 :         pPos += 2;
     374       91548 :         while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
     375          96 :             ++pPos;
     376       45726 :         pComponents->aAuthority.pEnd = pPos;
     377             :     }
     378             : 
     379      125630 :     pComponents->aPath.pBegin = pPos;
     380     4711316 :     while (pPos != pEnd && *pPos != '?' && * pPos != '#')
     381     4460056 :         ++pPos;
     382      125630 :     pComponents->aPath.pEnd = pPos;
     383             : 
     384      125630 :     if (pPos != pEnd && *pPos == '?')
     385             :     {
     386          48 :         pComponents->aQuery.pBegin = pPos++;
     387         153 :         while (pPos != pEnd && * pPos != '#')
     388          57 :             ++pPos;
     389          48 :         pComponents->aQuery.pEnd = pPos;
     390             :     }
     391             : 
     392      125630 :     if (pPos != pEnd)
     393             :     {
     394             :         assert(*pPos == '#');
     395           6 :         pComponents->aFragment.pBegin = pPos;
     396           6 :         pComponents->aFragment.pEnd = pEnd;
     397             :     }
     398      125630 : }
     399             : 
     400      125617 : void appendPath(
     401             :     rtl::OUStringBuffer & buffer, sal_Int32 bufferStart, bool precedingSlash,
     402             :     sal_Unicode const * pathBegin, sal_Unicode const * pathEnd)
     403             : {
     404      730162 :     while (precedingSlash || pathBegin != pathEnd) {
     405      478928 :         sal_Unicode const * p = pathBegin;
     406     4474019 :         while (p != pathEnd && *p != '/') {
     407     3516163 :             ++p;
     408             :         }
     409      478928 :         std::size_t n = p - pathBegin;
     410      478928 :         if (n == 1 && pathBegin[0] == '.') {
     411             :             // input begins with "." -> remove from input (and done):
     412             :             //  i.e., !precedingSlash -> !precedingSlash
     413             :             // input begins with "./" -> remove from input:
     414             :             //  i.e., !precedingSlash -> !precedingSlash
     415             :             // input begins with "/." -> replace with "/" in input (and not yet
     416             :             // done):
     417             :             //  i.e., precedingSlash -> precedingSlash
     418             :             // input begins with "/./" -> replace with "/" in input:
     419             :             //  i.e., precedingSlash -> precedingSlash
     420      478916 :         } else if (n == 2 && pathBegin[0] == '.' && pathBegin[1] == '.') {
     421             :             // input begins with ".." -> remove from input (and done):
     422             :             //  i.e., !precedingSlash -> !precedingSlash
     423             :             // input begins with "../" -> remove from input
     424             :             //  i.e., !precedingSlash -> !precedingSlash
     425             :             // input begins with "/.." -> replace with "/" in input, and shrink
     426             :             // output (not yet done):
     427             :             //  i.e., precedingSlash -> precedingSlash
     428             :             // input begins with "/../" -> replace with "/" in input, and shrink
     429             :             // output:
     430             :             //  i.e., precedingSlash -> precedingSlash
     431          54 :             if (precedingSlash) {
     432             :                 buffer.truncate(
     433             :                     bufferStart
     434             :                     + std::max<sal_Int32>(
     435             :                         rtl_ustr_lastIndexOfChar_WithLength(
     436          27 :                             buffer.getStr() + bufferStart,
     437          54 :                             buffer.getLength() - bufferStart, '/'),
     438          54 :                         0));
     439             :             }
     440             :         } else {
     441      478889 :             if (precedingSlash) {
     442      398973 :                 buffer.append('/');
     443             :             }
     444      478889 :             buffer.append(pathBegin, n);
     445      478889 :             precedingSlash = p != pathEnd;
     446             :         }
     447      478928 :         pathBegin = p + (p == pathEnd ? 0 : 1);
     448             :     }
     449      125617 : }
     450             : 
     451             : }
     452             : 
     453       18338 : sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
     454             :     SAL_THROW_EXTERN_C()
     455             : {
     456             :     static sal_Bool const aCharClass[][nCharClassSize]
     457             :     = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
     458             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     459             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
     460             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
     461             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
     462             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
     463             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
     464             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  /*pqrstuvwxyz{|}~ */
     465             :        },
     466             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
     467             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     468             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
     469             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
     470             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     471             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     472             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     473             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     474             :        },
     475             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
     476             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     477             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     478             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
     479             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     480             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     481             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     482             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     483             :        },
     484             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
     485             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     486             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     487             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     488             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     489             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     490             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     491             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     492             :        },
     493             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
     494             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     495             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     496             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     497             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     498             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     499             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     500             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     501             :        },
     502             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
     503             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     504             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     505             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     506             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     507             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     508             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     509             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     510             :        },
     511             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
     512             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     513             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     514             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
     515             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     516             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     517             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     518             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     519             :        },
     520             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
     521             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     522             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
     523             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
     524             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     525             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     526             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     527             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     528             :        }};
     529             :     assert(
     530             :         (eCharClass >= 0
     531             :          && (sal::static_int_cast< std::size_t >(eCharClass)
     532             :              < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
     533       18338 :     return aCharClass[eCharClass];
     534             : }
     535             : 
     536      452528 : void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
     537             :                             rtl_UriEncodeMechanism eMechanism,
     538             :                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
     539             :     SAL_THROW_EXTERN_C()
     540             : {
     541             :     assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
     542             : 
     543      452528 :     sal_Unicode const * p = pText->buffer;
     544      452528 :     sal_Unicode const * pEnd = p + pText->length;
     545      452528 :     sal_Int32 nCapacity = pText->length;
     546      452528 :     rtl_uString_new_WithLength(pResult, nCapacity);
     547      452528 :     while (p < pEnd)
     548             :     {
     549             :         EscapeType eType;
     550             :         sal_uInt32 nUtf32 = readUcs4(
     551             :             &p, pEnd,
     552             :             (eMechanism == rtl_UriEncodeKeepEscapes
     553    32312100 :              || eMechanism == rtl_UriEncodeCheckEscapes
     554    64643495 :              || eMechanism == rtl_UriEncodeStrictKeepEscapes),
     555    64677150 :             eCharset, &eType);
     556    32338575 :         switch (eType)
     557             :         {
     558             :         case EscapeNo:
     559    32338567 :             if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
     560             :                 writeUnicode(pResult, &nCapacity,
     561    32328335 :                              static_cast< sal_Unicode >(nUtf32));
     562       10232 :             else if (!writeEscapeChar(
     563             :                          pResult, &nCapacity, nUtf32, eCharset,
     564             :                          (eMechanism == rtl_UriEncodeStrict
     565       10232 :                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
     566             :             {
     567           2 :                 rtl_uString_new(pResult);
     568           2 :                 return;
     569             :             }
     570    32338565 :             break;
     571             : 
     572             :         case EscapeChar:
     573           4 :             if (eMechanism == rtl_UriEncodeCheckEscapes
     574           4 :                 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
     575             :                 writeUnicode(pResult, &nCapacity,
     576           0 :                              static_cast< sal_Unicode >(nUtf32));
     577           4 :             else if (!writeEscapeChar(
     578             :                          pResult, &nCapacity, nUtf32, eCharset,
     579             :                          (eMechanism == rtl_UriEncodeStrict
     580           4 :                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
     581             :             {
     582           0 :                 rtl_uString_new(pResult);
     583           0 :                 return;
     584             :             }
     585           4 :             break;
     586             : 
     587             :         case EscapeOctet:
     588           4 :             writeEscapeOctet(pResult, &nCapacity, nUtf32);
     589           4 :             break;
     590             :         }
     591             :     }
     592      452526 :     *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
     593             : }
     594             : 
     595     1206209 : void SAL_CALL rtl_uriDecode(rtl_uString * pText,
     596             :                             rtl_UriDecodeMechanism eMechanism,
     597             :                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
     598             :     SAL_THROW_EXTERN_C()
     599             : {
     600     1206209 :     switch (eMechanism)
     601             :     {
     602             :     case rtl_UriDecodeNone:
     603           0 :         rtl_uString_assign(pResult, pText);
     604           0 :         break;
     605             : 
     606             :     case rtl_UriDecodeToIuri:
     607           5 :         eCharset = RTL_TEXTENCODING_UTF8;
     608             :         //fall-through
     609             :     default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
     610             :         {
     611     1206209 :             sal_Unicode const * p = pText->buffer;
     612     1206209 :             sal_Unicode const * pEnd = p + pText->length;
     613     1206209 :             sal_Int32 nCapacity = pText->length;
     614     1206209 :             rtl_uString_new_WithLength(pResult, nCapacity);
     615    89332535 :             while (p < pEnd)
     616             :             {
     617             :                 EscapeType eType;
     618    86920120 :                 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
     619    86920120 :                 switch (eType)
     620             :                 {
     621             :                 case EscapeChar:
     622       10360 :                     if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
     623             :                     {
     624           1 :                         writeEscapeOctet(pResult, &nCapacity, nUtf32);
     625           1 :                         break;
     626             :                     }
     627             :                 case EscapeNo:
     628    86920095 :                     writeUcs4(pResult, &nCapacity, nUtf32);
     629    86920095 :                     break;
     630             : 
     631             :                 case EscapeOctet:
     632          24 :                     if (eMechanism == rtl_UriDecodeStrict) {
     633           3 :                         rtl_uString_new(pResult);
     634     1206212 :                         return;
     635             :                     }
     636          21 :                     writeEscapeOctet(pResult, &nCapacity, nUtf32);
     637          21 :                     break;
     638             :                 }
     639             :             }
     640     1206206 :             *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
     641             :         }
     642     1206206 :         break;
     643             :     }
     644             : }
     645             : 
     646       79922 : sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
     647             :                                          rtl_uString * pRelUriRef,
     648             :                                          rtl_uString ** pResult,
     649             :                                          rtl_uString ** pException)
     650             :     SAL_THROW_EXTERN_C()
     651             : {
     652             :     // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the
     653             :     // relative URI into an absolute one:
     654       79922 :     rtl::OUStringBuffer aBuffer;
     655       79922 :     Components aRelComponents;
     656       79922 :     parseUriRef(pRelUriRef, &aRelComponents);
     657       79922 :     if (aRelComponents.aScheme.isPresent())
     658             :     {
     659             :         aBuffer.append(aRelComponents.aScheme.pBegin,
     660       34214 :                        aRelComponents.aScheme.getLength());
     661       34214 :         if (aRelComponents.aAuthority.isPresent())
     662             :             aBuffer.append(aRelComponents.aAuthority.pBegin,
     663          19 :                            aRelComponents.aAuthority.getLength());
     664             :         appendPath(
     665             :             aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin,
     666       34214 :             aRelComponents.aPath.pEnd);
     667       34214 :         if (aRelComponents.aQuery.isPresent())
     668             :             aBuffer.append(aRelComponents.aQuery.pBegin,
     669           0 :                            aRelComponents.aQuery.getLength());
     670             :     }
     671             :     else
     672             :     {
     673       45708 :         Components aBaseComponents;
     674       45708 :         parseUriRef(pBaseUriRef, &aBaseComponents);
     675       45708 :         if (!aBaseComponents.aScheme.isPresent())
     676             :         {
     677             :             rtl_uString_assign(
     678             :                 pException,
     679             :                 (rtl::OUString(
     680           2 :                     "<" + rtl::OUString(pBaseUriRef)
     681           2 :                     + "> does not start with a scheme component")
     682           1 :                  .pData));
     683           1 :             return false;
     684             :         }
     685             :         aBuffer.append(aBaseComponents.aScheme.pBegin,
     686       45707 :                        aBaseComponents.aScheme.getLength());
     687       45707 :         if (aRelComponents.aAuthority.isPresent())
     688             :         {
     689             :             aBuffer.append(aRelComponents.aAuthority.pBegin,
     690           1 :                            aRelComponents.aAuthority.getLength());
     691             :             appendPath(
     692             :                 aBuffer, aBuffer.getLength(), false,
     693           1 :                 aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
     694           1 :             if (aRelComponents.aQuery.isPresent())
     695             :                 aBuffer.append(aRelComponents.aQuery.pBegin,
     696           0 :                                aRelComponents.aQuery.getLength());
     697             :         }
     698             :         else
     699             :         {
     700       45706 :             if (aBaseComponents.aAuthority.isPresent())
     701             :                 aBuffer.append(aBaseComponents.aAuthority.pBegin,
     702       45705 :                                aBaseComponents.aAuthority.getLength());
     703       45706 :             if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd)
     704             :             {
     705             :                 aBuffer.append(aBaseComponents.aPath.pBegin,
     706           3 :                                aBaseComponents.aPath.getLength());
     707           3 :                 if (aRelComponents.aQuery.isPresent())
     708             :                     aBuffer.append(aRelComponents.aQuery.pBegin,
     709           1 :                                    aRelComponents.aQuery.getLength());
     710           2 :                 else if (aBaseComponents.aQuery.isPresent())
     711             :                     aBuffer.append(aBaseComponents.aQuery.pBegin,
     712           2 :                                    aBaseComponents.aQuery.getLength());
     713             :             }
     714             :             else
     715             :             {
     716       45703 :                 if (aRelComponents.aPath.pBegin != aRelComponents.aPath.pEnd
     717       45703 :                     && *aRelComponents.aPath.pBegin == '/')
     718             :                     appendPath(
     719             :                         aBuffer, aBuffer.getLength(), false,
     720           3 :                         aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
     721       91400 :                 else if (aBaseComponents.aAuthority.isPresent()
     722       91399 :                          && aBaseComponents.aPath.pBegin
     723       45699 :                             == aBaseComponents.aPath.pEnd)
     724             :                     appendPath(
     725             :                         aBuffer, aBuffer.getLength(), true,
     726           1 :                         aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
     727             :                 else
     728             :                 {
     729       45699 :                     sal_Int32 n = aBuffer.getLength();
     730             :                     sal_Int32 i = rtl_ustr_lastIndexOfChar_WithLength(
     731             :                         aBaseComponents.aPath.pBegin,
     732       45699 :                         aBaseComponents.aPath.getLength(), '/');
     733       45699 :                     if (i >= 0) {
     734             :                         appendPath(
     735             :                             aBuffer, n, false, aBaseComponents.aPath.pBegin,
     736       45699 :                             aBaseComponents.aPath.pBegin + i);
     737             :                     }
     738             :                     appendPath(
     739             :                         aBuffer, n, i >= 0, aRelComponents.aPath.pBegin,
     740       45699 :                         aRelComponents.aPath.pEnd);
     741             :                 }
     742       45703 :                 if (aRelComponents.aQuery.isPresent())
     743             :                     aBuffer.append(aRelComponents.aQuery.pBegin,
     744           5 :                                    aRelComponents.aQuery.getLength());
     745             :             }
     746             :         }
     747             :     }
     748       79921 :     if (aRelComponents.aFragment.isPresent())
     749             :         aBuffer.append(aRelComponents.aFragment.pBegin,
     750           6 :                        aRelComponents.aFragment.getLength());
     751       79921 :     rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
     752       79921 :     return true;
     753             : }
     754             : 
     755             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11