LCOV - code coverage report
Current view: top level - sal/rtl - uri.cxx (source / functions) Hit Total Coverage
Test: commit e02a6cb2c3e2b23b203b422e4e0680877f232636 Lines: 123 290 42.4 %
Date: 2014-04-14 Functions: 14 20 70.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "surrogates.hxx"
      21             : 
      22             : #include "osl/diagnose.h"
      23             : #include "rtl/character.hxx"
      24             : #include "rtl/strbuf.hxx"
      25             : #include "rtl/textenc.h"
      26             : #include "rtl/textcvt.h"
      27             : #include "rtl/uri.h"
      28             : #include "rtl/ustrbuf.h"
      29             : #include "rtl/ustrbuf.hxx"
      30             : #include "rtl/ustring.h"
      31             : #include "rtl/ustring.hxx"
      32             : #include "sal/types.h"
      33             : #include "sal/macros.h"
      34             : 
      35             : #include <algorithm>
      36             : #include <cstddef>
      37             : 
      38             : namespace {
      39             : 
      40             : std::size_t const nCharClassSize = 128;
      41             : 
      42             : sal_Unicode const cEscapePrefix = 0x25; // '%'
      43             : 
      44   200232329 : inline bool isHighSurrogate(sal_uInt32 nUtf16)
      45             : {
      46   200232329 :     return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
      47             : }
      48             : 
      49           0 : inline bool isLowSurrogate(sal_uInt32 nUtf16)
      50             : {
      51           0 :     return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
      52             : }
      53             : 
      54           0 : inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
      55             : {
      56           0 :     return SAL_RTL_COMBINE_SURROGATES(high, low);
      57             : }
      58             : 
      59           0 : inline int getHexWeight(sal_uInt32 nUtf32)
      60             : {
      61           0 :     return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
      62           0 :                static_cast< int >(nUtf32 - 0x30) :
      63           0 :            nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
      64           0 :                static_cast< int >(nUtf32 - 0x41 + 10) :
      65           0 :            nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
      66           0 :                static_cast< int >(nUtf32 - 0x61 + 10) :
      67           0 :                -1; // not a hex digit
      68             : }
      69             : 
      70    69336703 : inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
      71             : {
      72    69336703 :     return nUtf32 < nCharClassSize && pCharClass[nUtf32];
      73             : }
      74             : 
      75   200232329 : inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
      76             :                          sal_Unicode cChar)
      77             : {
      78   200232329 :     rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
      79   200232329 : }
      80             : 
      81             : enum EscapeType
      82             : {
      83             :     EscapeNo,
      84             :     EscapeChar,
      85             :     EscapeOctet
      86             : };
      87             : 
      88             : /* Read any of the following:
      89             : 
      90             :    - sequence of escape sequences representing character from eCharset,
      91             :      translated to single UCS4 character; or
      92             : 
      93             :    - pair of UTF-16 surrogates, translated to single UCS4 character; or
      94             : 
      95             :    _ single UTF-16 character, extended to UCS4 character.
      96             :  */
      97   200232329 : sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
      98             :                     bool bEncoded, rtl_TextEncoding eCharset,
      99             :                     EscapeType * pType)
     100             : {
     101   200232329 :     sal_uInt32 nChar = *(*pBegin)++;
     102             :     int nWeight1;
     103             :     int nWeight2;
     104   200232329 :     if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
     105           0 :         && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
     106   200232329 :         && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
     107             :     {
     108           0 :         *pBegin += 2;
     109           0 :         nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
     110           0 :         if (nChar <= 0x7F)
     111           0 :             *pType = EscapeChar;
     112           0 :         else if (eCharset == RTL_TEXTENCODING_UTF8)
     113             :         {
     114           0 :             if (nChar >= 0xC0 && nChar <= 0xF4)
     115             :             {
     116             :                 sal_uInt32 nEncoded;
     117             :                 int nShift;
     118             :                 sal_uInt32 nMin;
     119           0 :                 if (nChar <= 0xDF)
     120             :                 {
     121           0 :                     nEncoded = (nChar & 0x1F) << 6;
     122           0 :                     nShift = 0;
     123           0 :                     nMin = 0x80;
     124             :                 }
     125           0 :                 else if (nChar <= 0xEF)
     126             :                 {
     127           0 :                     nEncoded = (nChar & 0x0F) << 12;
     128           0 :                     nShift = 6;
     129           0 :                     nMin = 0x800;
     130             :                 }
     131             :                 else
     132             :                 {
     133           0 :                     nEncoded = (nChar & 0x07) << 18;
     134           0 :                     nShift = 12;
     135           0 :                     nMin = 0x10000;
     136             :                 }
     137           0 :                 sal_Unicode const * p = *pBegin;
     138           0 :                 bool bUTF8 = true;
     139           0 :                 for (; nShift >= 0; nShift -= 6)
     140             :                 {
     141           0 :                     if (pEnd - p < 3 || p[0] != cEscapePrefix
     142           0 :                         || (nWeight1 = getHexWeight(p[1])) < 8
     143           0 :                         || nWeight1 > 11
     144           0 :                         || (nWeight2 = getHexWeight(p[2])) < 0)
     145             :                     {
     146           0 :                         bUTF8 = false;
     147           0 :                         break;
     148             :                     }
     149           0 :                     p += 3;
     150           0 :                     nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
     151             :                 }
     152           0 :                 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
     153           0 :                     && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
     154             :                 {
     155           0 :                     *pBegin = p;
     156           0 :                     *pType = EscapeChar;
     157           0 :                     return nEncoded;
     158             :                 }
     159             :             }
     160           0 :             *pType = EscapeOctet;
     161             :         }
     162             :         else
     163             :         {
     164           0 :             rtl::OStringBuffer aBuf;
     165           0 :             aBuf.append(static_cast< char >(nChar));
     166             :             rtl_TextToUnicodeConverter aConverter
     167           0 :                 = rtl_createTextToUnicodeConverter(eCharset);
     168           0 :             sal_Unicode const * p = *pBegin;
     169             :             for (;;)
     170             :             {
     171             :                 sal_Unicode aDst[2];
     172             :                 sal_uInt32 nInfo;
     173             :                 sal_Size nConverted;
     174             :                 sal_Size nDstSize = rtl_convertTextToUnicode(
     175           0 :                     aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
     176             :                     SAL_N_ELEMENTS( aDst ),
     177             :                     (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
     178             :                      | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
     179             :                      | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
     180           0 :                     &nInfo, &nConverted);
     181           0 :                 if (nInfo == 0)
     182             :                 {
     183             :                     assert( nConverted
     184             :                         == sal::static_int_cast< sal_uInt32 >(
     185             :                             aBuf.getLength()));
     186           0 :                     rtl_destroyTextToUnicodeConverter(aConverter);
     187           0 :                     *pBegin = p;
     188           0 :                     *pType = EscapeChar;
     189             :                     assert( nDstSize == 1
     190             :                         || (nDstSize == 2 && isHighSurrogate(aDst[0])
     191             :                             && isLowSurrogate(aDst[1])));
     192             :                     return nDstSize == 1
     193           0 :                         ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
     194             :                 }
     195           0 :                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
     196           0 :                          && pEnd - p >= 3 && p[0] == cEscapePrefix
     197           0 :                          && (nWeight1 = getHexWeight(p[1])) >= 0
     198           0 :                          && (nWeight2 = getHexWeight(p[2])) >= 0)
     199             :                 {
     200           0 :                     p += 3;
     201           0 :                     aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
     202             :                 }
     203           0 :                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
     204           0 :                          && p != pEnd && *p <= 0x7F)
     205             :                 {
     206           0 :                     aBuf.append(static_cast< char >(*p++));
     207             :                 }
     208             :                 else
     209             :                 {
     210             :                     assert(
     211             :                         (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
     212             :                         == 0);
     213           0 :                     break;
     214             :                 }
     215           0 :             }
     216           0 :             rtl_destroyTextToUnicodeConverter(aConverter);
     217           0 :             *pType = EscapeOctet;
     218             :         }
     219           0 :         return nChar;
     220             :     }
     221             :     else
     222             :     {
     223   200232329 :         *pType = EscapeNo;
     224   200232329 :         return isHighSurrogate(nChar) && *pBegin < pEnd
     225           0 :                && isLowSurrogate(**pBegin) ?
     226   200232329 :                    combineSurrogates(nChar, *(*pBegin)++) : nChar;
     227             :     }
     228             : }
     229             : 
     230   130895626 : void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
     231             : {
     232             :     assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
     233   130895626 :     if (nUtf32 <= 0xFFFF) {
     234             :         writeUnicode(
     235   130895626 :             pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
     236             :     } else {
     237           0 :         nUtf32 -= 0x10000;
     238             :         writeUnicode(
     239             :             pBuffer, pCapacity,
     240           0 :             static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
     241             :         writeUnicode(
     242             :             pBuffer, pCapacity,
     243           0 :             static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
     244             :     }
     245   130895626 : }
     246             : 
     247           0 : void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
     248             :                       sal_uInt32 nOctet)
     249             : {
     250             :     assert(nOctet <= 0xFF); // bad octet
     251             : 
     252             :     static sal_Unicode const aHex[16]
     253             :         = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
     254             :             0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
     255             : 
     256           0 :     writeUnicode(pBuffer, pCapacity, cEscapePrefix);
     257           0 :     writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
     258           0 :     writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
     259           0 : }
     260             : 
     261           0 : bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
     262             :                      sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
     263             : {
     264             :     assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
     265           0 :     if (eCharset == RTL_TEXTENCODING_UTF8) {
     266           0 :         if (nUtf32 < 0x80)
     267           0 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32);
     268           0 :         else if (nUtf32 < 0x800)
     269             :         {
     270           0 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
     271           0 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     272             :         }
     273           0 :         else if (nUtf32 < 0x10000)
     274             :         {
     275           0 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
     276           0 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
     277           0 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     278             :         }
     279             :         else
     280             :         {
     281           0 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
     282           0 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
     283           0 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
     284           0 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     285             :         }
     286             :     } else {
     287             :         rtl_UnicodeToTextConverter aConverter
     288           0 :             = rtl_createUnicodeToTextConverter(eCharset);
     289             :         sal_Unicode aSrc[2];
     290             :         sal_Size nSrcSize;
     291           0 :         if (nUtf32 <= 0xFFFF)
     292             :         {
     293           0 :             aSrc[0] = static_cast< sal_Unicode >(nUtf32);
     294           0 :             nSrcSize = 1;
     295             :         }
     296             :         else
     297             :         {
     298             :             aSrc[0] = static_cast< sal_Unicode >(
     299           0 :                 ((nUtf32 - 0x10000) >> 10) | 0xD800);
     300             :             aSrc[1] = static_cast< sal_Unicode >(
     301           0 :                 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
     302           0 :             nSrcSize = 2;
     303             :         }
     304             :         sal_Char aDst[32]; // FIXME  random value
     305             :         sal_uInt32 nInfo;
     306             :         sal_Size nConverted;
     307             :         sal_Size nDstSize = rtl_convertUnicodeToText(
     308             :             aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
     309             :             RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
     310             :             | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
     311             :             | RTL_UNICODETOTEXT_FLAGS_FLUSH,
     312           0 :             &nInfo, &nConverted);
     313             :         assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
     314           0 :         rtl_destroyUnicodeToTextConverter(aConverter);
     315           0 :         if (nInfo == 0) {
     316             :             assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
     317           0 :             for (sal_Size i = 0; i < nDstSize; ++i)
     318             :                 writeEscapeOctet(pBuffer, pCapacity,
     319           0 :                                  static_cast< unsigned char >(aDst[i]));
     320             :                     // FIXME  all octets are escaped, even if there is no need
     321             :         } else {
     322           0 :             if (bStrict) {
     323           0 :                 return false;
     324             :             } else {
     325           0 :                 writeUcs4(pBuffer, pCapacity, nUtf32);
     326             :             }
     327             :         }
     328             :     }
     329           0 :     return true;
     330             : }
     331             : 
     332             : struct Component
     333             : {
     334             :     sal_Unicode const * pBegin;
     335             :     sal_Unicode const * pEnd;
     336             : 
     337      213600 :     inline Component(): pBegin(0), pEnd(0) {}
     338             : 
     339      170876 :     inline bool isPresent() const { return pBegin != 0; }
     340             : 
     341             :     inline sal_Int32 getLength() const;
     342             : };
     343             : 
     344       85234 : inline sal_Int32 Component::getLength() const
     345             : {
     346             :     assert(isPresent()); // taking length of non-present component
     347       85234 :     return static_cast< sal_Int32 >(pEnd - pBegin);
     348             : }
     349             : 
     350       42720 : struct Components
     351             : {
     352             :     Component aScheme;
     353             :     Component aAuthority;
     354             :     Component aPath;
     355             :     Component aQuery;
     356             :     Component aFragment;
     357             : };
     358             : 
     359       42720 : void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
     360             : {
     361             :     // This algorithm is liberal and accepts various forms of illegal input.
     362             : 
     363       42720 :     sal_Unicode const * pBegin = pUriRef->buffer;
     364       42720 :     sal_Unicode const * pEnd = pBegin + pUriRef->length;
     365       42720 :     sal_Unicode const * pPos = pBegin;
     366             : 
     367       42720 :     if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
     368             :     {
     369      173949 :         for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
     370             :         {
     371      173949 :             if (*p == ':')
     372             :             {
     373       42716 :                 pComponents->aScheme.pBegin = pBegin;
     374       42716 :                 pComponents->aScheme.pEnd = ++p;
     375       42716 :                 pPos = p;
     376       42716 :                 break;
     377             :             }
     378      263070 :             else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
     379      131837 :                      && *p != '.')
     380             :             {
     381           4 :                 break;
     382             :             }
     383             :         }
     384             :     }
     385             : 
     386       42720 :     if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
     387             :     {
     388       42514 :         pComponents->aAuthority.pBegin = pPos;
     389       42514 :         pPos += 2;
     390       85028 :         while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
     391           0 :             ++pPos;
     392       42514 :         pComponents->aAuthority.pEnd = pPos;
     393             :     }
     394             : 
     395       42720 :     pComponents->aPath.pBegin = pPos;
     396     2811893 :     while (pPos != pEnd && *pPos != '?' && * pPos != '#')
     397     2726453 :         ++pPos;
     398       42720 :     pComponents->aPath.pEnd = pPos;
     399             : 
     400       42720 :     if (pPos != pEnd && *pPos == '?')
     401             :     {
     402           0 :         pComponents->aQuery.pBegin = pPos++;
     403           0 :         while (pPos != pEnd && * pPos != '#')
     404           0 :             ++pPos;
     405           0 :         pComponents->aQuery.pEnd = pPos;
     406             :     }
     407             : 
     408       42720 :     if (pPos != pEnd)
     409             :     {
     410             :         assert(*pPos == '#');
     411           0 :         pComponents->aFragment.pBegin = pPos;
     412           0 :         pComponents->aFragment.pEnd = pEnd;
     413             :     }
     414       42720 : }
     415             : 
     416       42720 : void appendPath(
     417             :     rtl::OUStringBuffer & buffer, sal_Int32 bufferStart, bool precedingSlash,
     418             :     sal_Unicode const * pathBegin, sal_Unicode const * pathEnd)
     419             : {
     420      468463 :     while (precedingSlash || pathBegin != pathEnd) {
     421      383023 :         sal_Unicode const * p = pathBegin;
     422     3152133 :         while (p != pathEnd && *p != '/') {
     423     2386087 :             ++p;
     424             :         }
     425      383023 :         std::size_t n = p - pathBegin;
     426      383023 :         if (n == 1 && pathBegin[0] == '.') {
     427             :             // input begins with "." -> remove from input (and done):
     428             :             //  i.e., !precedingSlash -> !precedingSlash
     429             :             // input begins with "./" -> remove from input:
     430             :             //  i.e., !precedingSlash -> !precedingSlash
     431             :             // input begins with "/." -> replace with "/" in input (and not yet
     432             :             // done):
     433             :             //  i.e., precedingSlash -> precedingSlash
     434             :             // input begins with "/./" -> replace with "/" in input:
     435             :             //  i.e., precedingSlash -> precedingSlash
     436      383023 :         } else if (n == 2 && pathBegin[0] == '.' && pathBegin[1] == '.') {
     437             :             // input begins with ".." -> remove from input (and done):
     438             :             //  i.e., !precedingSlash -> !precedingSlash
     439             :             // input begins with "../" -> remove from input
     440             :             //  i.e., !precedingSlash -> !precedingSlash
     441             :             // input begins with "/.." -> replace with "/" in input, and shrink
     442             :             // output (not not yet done):
     443             :             //  i.e., precedingSlash -> precedingSlash
     444             :             // input begins with "/../" -> replace with "/" in input, and shrink
     445             :             // output:
     446             :             //  i.e., precedingSlash -> precedingSlash
     447           0 :             if (precedingSlash) {
     448             :                 buffer.truncate(
     449             :                     bufferStart
     450             :                     + std::max<sal_Int32>(
     451             :                         rtl_ustr_lastIndexOfChar_WithLength(
     452           0 :                             buffer.getStr() + bufferStart,
     453           0 :                             buffer.getLength() - bufferStart, '/'),
     454           0 :                         0));
     455             :             }
     456             :         } else {
     457      383023 :             if (precedingSlash) {
     458      340307 :                 buffer.append('/');
     459             :             }
     460      383023 :             buffer.append(pathBegin, n);
     461      383023 :             precedingSlash = p != pathEnd;
     462             :         }
     463      383023 :         pathBegin = p + (p == pathEnd ? 0 : 1);
     464             :     }
     465       42720 : }
     466             : 
     467             : }
     468             : 
     469           0 : sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
     470             :     SAL_THROW_EXTERN_C()
     471             : {
     472             :     static sal_Bool const aCharClass[][nCharClassSize]
     473             :     = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
     474             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     475             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
     476             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
     477             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
     478             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
     479             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
     480             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  /*pqrstuvwxyz{|}~ */
     481             :        },
     482             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
     483             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     484             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
     485             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
     486             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     487             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     488             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     489             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     490             :        },
     491             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
     492             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     493             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     494             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
     495             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     496             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     497             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     498             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     499             :        },
     500             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
     501             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     502             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     503             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     504             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     505             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     506             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     507             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     508             :        },
     509             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
     510             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     511             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     512             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     513             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     514             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     515             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     516             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     517             :        },
     518             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
     519             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     520             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     521             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     522             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     523             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     524             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     525             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     526             :        },
     527             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
     528             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     529             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     530             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
     531             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     532             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     533             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     534             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     535             :        },
     536             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
     537             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     538             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
     539             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
     540             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     541             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     542             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     543             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     544             :        }};
     545             :     assert(
     546             :         (eCharClass >= 0
     547             :          && (sal::static_int_cast< std::size_t >(eCharClass)
     548             :              < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
     549           0 :     return aCharClass[eCharClass];
     550             : }
     551             : 
     552     1615453 : void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
     553             :                             rtl_UriEncodeMechanism eMechanism,
     554             :                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
     555             :     SAL_THROW_EXTERN_C()
     556             : {
     557             :     assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
     558             : 
     559     1615453 :     sal_Unicode const * p = pText->buffer;
     560     1615453 :     sal_Unicode const * pEnd = p + pText->length;
     561     1615453 :     sal_Int32 nCapacity = pText->length;
     562     1615453 :     rtl_uString_new_WithLength(pResult, nCapacity);
     563     1615453 :     while (p < pEnd)
     564             :     {
     565             :         EscapeType eType;
     566             :         sal_uInt32 nUtf32 = readUcs4(
     567             :             &p, pEnd,
     568             :             (eMechanism == rtl_UriEncodeKeepEscapes
     569    69336703 :              || eMechanism == rtl_UriEncodeCheckEscapes
     570   138673406 :              || eMechanism == rtl_UriEncodeStrictKeepEscapes),
     571   138673406 :             eCharset, &eType);
     572    69336703 :         switch (eType)
     573             :         {
     574             :         case EscapeNo:
     575    69336703 :             if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
     576             :                 writeUnicode(pResult, &nCapacity,
     577    69336703 :                              static_cast< sal_Unicode >(nUtf32));
     578           0 :             else if (!writeEscapeChar(
     579             :                          pResult, &nCapacity, nUtf32, eCharset,
     580             :                          (eMechanism == rtl_UriEncodeStrict
     581           0 :                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
     582             :             {
     583           0 :                 rtl_uString_new(pResult);
     584           0 :                 return;
     585             :             }
     586    69336703 :             break;
     587             : 
     588             :         case EscapeChar:
     589           0 :             if (eMechanism == rtl_UriEncodeCheckEscapes
     590           0 :                 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
     591             :                 writeUnicode(pResult, &nCapacity,
     592           0 :                              static_cast< sal_Unicode >(nUtf32));
     593           0 :             else if (!writeEscapeChar(
     594             :                          pResult, &nCapacity, nUtf32, eCharset,
     595             :                          (eMechanism == rtl_UriEncodeStrict
     596           0 :                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
     597             :             {
     598           0 :                 rtl_uString_new(pResult);
     599           0 :                 return;
     600             :             }
     601           0 :             break;
     602             : 
     603             :         case EscapeOctet:
     604           0 :             writeEscapeOctet(pResult, &nCapacity, nUtf32);
     605           0 :             break;
     606             :         }
     607             :     }
     608     1615453 :     *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
     609             : }
     610             : 
     611     2042182 : void SAL_CALL rtl_uriDecode(rtl_uString * pText,
     612             :                             rtl_UriDecodeMechanism eMechanism,
     613             :                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
     614             :     SAL_THROW_EXTERN_C()
     615             : {
     616     2042182 :     switch (eMechanism)
     617             :     {
     618             :     case rtl_UriDecodeNone:
     619           0 :         rtl_uString_assign(pResult, pText);
     620           0 :         break;
     621             : 
     622             :     case rtl_UriDecodeToIuri:
     623           0 :         eCharset = RTL_TEXTENCODING_UTF8;
     624             :         //fall-through
     625             :     default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
     626             :         {
     627     2042182 :             sal_Unicode const * p = pText->buffer;
     628     2042182 :             sal_Unicode const * pEnd = p + pText->length;
     629     2042182 :             sal_Int32 nCapacity = pText->length;
     630     2042182 :             rtl_uString_new_WithLength(pResult, nCapacity);
     631   134979990 :             while (p < pEnd)
     632             :             {
     633             :                 EscapeType eType;
     634   130895626 :                 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
     635   130895626 :                 switch (eType)
     636             :                 {
     637             :                 case EscapeChar:
     638           0 :                     if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
     639             :                     {
     640           0 :                         writeEscapeOctet(pResult, &nCapacity, nUtf32);
     641           0 :                         break;
     642             :                     }
     643             :                 case EscapeNo:
     644   130895626 :                     writeUcs4(pResult, &nCapacity, nUtf32);
     645   130895626 :                     break;
     646             : 
     647             :                 case EscapeOctet:
     648           0 :                     if (eMechanism == rtl_UriDecodeStrict) {
     649           0 :                         rtl_uString_new(pResult);
     650     2042182 :                         return;
     651             :                     }
     652           0 :                     writeEscapeOctet(pResult, &nCapacity, nUtf32);
     653           0 :                     break;
     654             :                 }
     655             :             }
     656     2042182 :             *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
     657             :         }
     658     2042182 :         break;
     659             :     }
     660             : }
     661             : 
     662       42716 : sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
     663             :                                          rtl_uString * pRelUriRef,
     664             :                                          rtl_uString ** pResult,
     665             :                                          rtl_uString ** pException)
     666             :     SAL_THROW_EXTERN_C()
     667             : {
     668             :     // Use the strict parser algorithm from RFC 3986, section 5.2, to turn the
     669             :     // relative URI into an absolute one:
     670       42716 :     rtl::OUStringBuffer aBuffer;
     671       42716 :     Components aRelComponents;
     672       42716 :     parseUriRef(pRelUriRef, &aRelComponents);
     673       42716 :     if (aRelComponents.aScheme.isPresent())
     674             :     {
     675             :         aBuffer.append(aRelComponents.aScheme.pBegin,
     676       42712 :                        aRelComponents.aScheme.getLength());
     677       42712 :         if (aRelComponents.aAuthority.isPresent())
     678             :             aBuffer.append(aRelComponents.aAuthority.pBegin,
     679       42510 :                            aRelComponents.aAuthority.getLength());
     680             :         appendPath(
     681             :             aBuffer, aBuffer.getLength(), false, aRelComponents.aPath.pBegin,
     682       42712 :             aRelComponents.aPath.pEnd);
     683       42712 :         if (aRelComponents.aQuery.isPresent())
     684             :             aBuffer.append(aRelComponents.aQuery.pBegin,
     685           0 :                            aRelComponents.aQuery.getLength());
     686             :     }
     687             :     else
     688             :     {
     689           4 :         Components aBaseComponents;
     690           4 :         parseUriRef(pBaseUriRef, &aBaseComponents);
     691           4 :         if (!aBaseComponents.aScheme.isPresent())
     692             :         {
     693             :             rtl_uString_assign(
     694             :                 pException,
     695             :                 (rtl::OUString(
     696           0 :                     "<" + rtl::OUString(pBaseUriRef)
     697           0 :                     + "> does not start with a scheme component")
     698           0 :                  .pData));
     699           0 :             return false;
     700             :         }
     701             :         aBuffer.append(aBaseComponents.aScheme.pBegin,
     702           4 :                        aBaseComponents.aScheme.getLength());
     703           4 :         if (aRelComponents.aAuthority.isPresent())
     704             :         {
     705             :             aBuffer.append(aRelComponents.aAuthority.pBegin,
     706           0 :                            aRelComponents.aAuthority.getLength());
     707             :             appendPath(
     708             :                 aBuffer, aBuffer.getLength(), false,
     709           0 :                 aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
     710           0 :             if (aRelComponents.aQuery.isPresent())
     711             :                 aBuffer.append(aRelComponents.aQuery.pBegin,
     712           0 :                                aRelComponents.aQuery.getLength());
     713             :         }
     714             :         else
     715             :         {
     716           4 :             if (aBaseComponents.aAuthority.isPresent())
     717             :                 aBuffer.append(aBaseComponents.aAuthority.pBegin,
     718           4 :                                aBaseComponents.aAuthority.getLength());
     719           4 :             if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd)
     720             :             {
     721             :                 aBuffer.append(aBaseComponents.aPath.pBegin,
     722           0 :                                aBaseComponents.aPath.getLength());
     723           0 :                 if (aRelComponents.aQuery.isPresent())
     724             :                     aBuffer.append(aRelComponents.aQuery.pBegin,
     725           0 :                                    aRelComponents.aQuery.getLength());
     726           0 :                 else if (aBaseComponents.aQuery.isPresent())
     727             :                     aBuffer.append(aBaseComponents.aQuery.pBegin,
     728           0 :                                    aBaseComponents.aQuery.getLength());
     729             :             }
     730             :             else
     731             :             {
     732           4 :                 if (aRelComponents.aPath.pBegin != aRelComponents.aPath.pEnd
     733           4 :                     && *aRelComponents.aPath.pBegin == '/')
     734             :                     appendPath(
     735             :                         aBuffer, aBuffer.getLength(), false,
     736           0 :                         aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
     737           8 :                 else if (aBaseComponents.aAuthority.isPresent()
     738           8 :                          && aBaseComponents.aPath.pBegin
     739           4 :                             == aBaseComponents.aPath.pEnd)
     740             :                     appendPath(
     741             :                         aBuffer, aBuffer.getLength(), true,
     742           0 :                         aRelComponents.aPath.pBegin, aRelComponents.aPath.pEnd);
     743             :                 else
     744             :                 {
     745           4 :                     sal_Int32 n = aBuffer.getLength();
     746             :                     sal_Int32 i = rtl_ustr_lastIndexOfChar_WithLength(
     747             :                         aBaseComponents.aPath.pBegin,
     748           4 :                         aBaseComponents.aPath.getLength(), '/');
     749           4 :                     if (i >= 0) {
     750             :                         appendPath(
     751             :                             aBuffer, n, false, aBaseComponents.aPath.pBegin,
     752           4 :                             aBaseComponents.aPath.pBegin + i);
     753             :                     }
     754             :                     appendPath(
     755             :                         aBuffer, n, i >= 0, aRelComponents.aPath.pBegin,
     756           4 :                         aRelComponents.aPath.pEnd);
     757             :                 }
     758           4 :                 if (aRelComponents.aQuery.isPresent())
     759             :                     aBuffer.append(aRelComponents.aQuery.pBegin,
     760           0 :                                    aRelComponents.aQuery.getLength());
     761             :             }
     762             :         }
     763             :     }
     764       42716 :     if (aRelComponents.aFragment.isPresent())
     765             :         aBuffer.append(aRelComponents.aFragment.pBegin,
     766           0 :                        aRelComponents.aFragment.getLength());
     767       42716 :     rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
     768       42716 :     return true;
     769             : }
     770             : 
     771             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10