LCOV - code coverage report
Current view: top level - libreoffice/sal/rtl/source - uri.cxx (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 285 290 98.3 %
Date: 2012-12-17 Functions: 22 22 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "surrogates.hxx"
      21             : 
      22             : #include "osl/diagnose.h"
      23             : #include "rtl/strbuf.hxx"
      24             : #include "rtl/textenc.h"
      25             : #include "rtl/textcvt.h"
      26             : #include "rtl/uri.h"
      27             : #include "rtl/ustrbuf.h"
      28             : #include "rtl/ustrbuf.hxx"
      29             : #include "rtl/ustring.h"
      30             : #include "rtl/ustring.hxx"
      31             : #include "sal/types.h"
      32             : #include "sal/macros.h"
      33             : 
      34             : #include <cstddef>
      35             : 
      36             : namespace {
      37             : 
      38             : std::size_t const nCharClassSize = 128;
      39             : 
      40             : sal_Unicode const cEscapePrefix = 0x25; // '%'
      41             : 
      42       38594 : inline bool isDigit(sal_uInt32 nUtf32)
      43             : {
      44       38594 :     return nUtf32 >= 0x30 && nUtf32 <= 0x39; // '0'--'9'
      45             : }
      46             : 
      47      289388 : inline bool isAlpha(sal_uInt32 nUtf32)
      48             : {
      49             :     // 'A'--'Z', 'a'--'z'
      50             :     return (
      51             :             (nUtf32 >= 0x41 && nUtf32 <= 0x5A) ||
      52             :             (nUtf32 >= 0x61 && nUtf32 <= 0x7A)
      53      289388 :            );
      54             : }
      55             : 
      56    48654094 : inline bool isHighSurrogate(sal_uInt32 nUtf16)
      57             : {
      58    48654094 :     return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
      59             : }
      60             : 
      61          44 : inline bool isLowSurrogate(sal_uInt32 nUtf16)
      62             : {
      63          44 :     return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
      64             : }
      65             : 
      66          10 : inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
      67             : {
      68          10 :     return SAL_RTL_COMBINE_SURROGATES(high, low);
      69             : }
      70             : 
      71       80490 : inline int getHexWeight(sal_uInt32 nUtf32)
      72             : {
      73             :     return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
      74             :                static_cast< int >(nUtf32 - 0x30) :
      75             :            nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
      76             :                static_cast< int >(nUtf32 - 0x41 + 10) :
      77             :            nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
      78             :                static_cast< int >(nUtf32 - 0x61 + 10) :
      79       80490 :                -1; // not a hex digit
      80             : }
      81             : 
      82     8491564 : inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
      83             : {
      84     8491564 :     return nUtf32 < nCharClassSize && pCharClass[nUtf32];
      85             : }
      86             : 
      87    48773601 : inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
      88             :                          sal_Unicode cChar)
      89             : {
      90    48773601 :     rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
      91    48773601 : }
      92             : 
      93             : enum EscapeType
      94             : {
      95             :     EscapeNo,
      96             :     EscapeChar,
      97             :     EscapeOctet
      98             : };
      99             : 
     100             : /* Read any of the following:
     101             : 
     102             :    - sequence of escape sequences representing character from eCharset,
     103             :      translated to single UCS4 character; or
     104             : 
     105             :    - pair of UTF-16 surrogates, translated to single UCS4 character; or
     106             : 
     107             :    _ single UTF-16 character, extended to UCS4 character.
     108             :  */
     109    48694195 : sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
     110             :                     bool bEncoded, rtl_TextEncoding eCharset,
     111             :                     EscapeType * pType)
     112             : {
     113    48694195 :     sal_uInt32 nChar = *(*pBegin)++;
     114             :     int nWeight1;
     115             :     int nWeight2;
     116    48774479 :     if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
     117       40143 :         && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
     118       40141 :         && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
     119             :     {
     120       40141 :         *pBegin += 2;
     121       40141 :         nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
     122       40141 :         if (nChar <= 0x7F)
     123       40043 :             *pType = EscapeChar;
     124          98 :         else if (eCharset == RTL_TEXTENCODING_UTF8)
     125             :         {
     126          78 :             if (nChar >= 0xC0 && nChar <= 0xF4)
     127             :             {
     128             :                 sal_uInt32 nEncoded;
     129             :                 int nShift;
     130             :                 sal_uInt32 nMin;
     131          44 :                 if (nChar <= 0xDF)
     132             :                 {
     133           8 :                     nEncoded = (nChar & 0x1F) << 6;
     134           8 :                     nShift = 0;
     135           8 :                     nMin = 0x80;
     136             :                 }
     137          36 :                 else if (nChar <= 0xEF)
     138             :                 {
     139          32 :                     nEncoded = (nChar & 0x0F) << 12;
     140          32 :                     nShift = 6;
     141          32 :                     nMin = 0x800;
     142             :                 }
     143             :                 else
     144             :                 {
     145           4 :                     nEncoded = (nChar & 0x07) << 18;
     146           4 :                     nShift = 12;
     147           4 :                     nMin = 0x10000;
     148             :                 }
     149          44 :                 sal_Unicode const * p = *pBegin;
     150          44 :                 bool bUTF8 = true;
     151         124 :                 for (; nShift >= 0; nShift -= 6)
     152             :                 {
     153         244 :                     if (pEnd - p < 3 || p[0] != cEscapePrefix
     154          82 :                         || (nWeight1 = getHexWeight(p[1])) < 8
     155             :                         || nWeight1 > 11
     156          80 :                         || (nWeight2 = getHexWeight(p[2])) < 0)
     157             :                     {
     158           2 :                         bUTF8 = sal_False;
     159           2 :                         break;
     160             :                     }
     161          80 :                     p += 3;
     162          80 :                     nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
     163             :                 }
     164          74 :                 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
     165          30 :                     && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
     166             :                 {
     167          26 :                     *pBegin = p;
     168          26 :                     *pType = EscapeChar;
     169          26 :                     return nEncoded;
     170             :                 }
     171             :             }
     172          52 :             *pType = EscapeOctet;
     173             :         }
     174             :         else
     175             :         {
     176          20 :             rtl::OStringBuffer aBuf;
     177          20 :             aBuf.append(static_cast< char >(nChar));
     178             :             rtl_TextToUnicodeConverter aConverter
     179          20 :                 = rtl_createTextToUnicodeConverter(eCharset);
     180          20 :             sal_Unicode const * p = *pBegin;
     181          28 :             for (;;)
     182             :             {
     183             :                 sal_Unicode aDst[2];
     184             :                 sal_uInt32 nInfo;
     185             :                 sal_Size nConverted;
     186             :                 sal_Size nDstSize = rtl_convertTextToUnicode(
     187          48 :                     aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
     188             :                     SAL_N_ELEMENTS( aDst ),
     189             :                     (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
     190             :                      | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
     191             :                      | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
     192          48 :                     &nInfo, &nConverted);
     193          48 :                 if (nInfo == 0)
     194             :                 {
     195             :                     assert( nConverted
     196             :                         == sal::static_int_cast< sal_uInt32 >(
     197             :                             aBuf.getLength()));
     198          16 :                     rtl_destroyTextToUnicodeConverter(aConverter);
     199          16 :                     *pBegin = p;
     200          16 :                     *pType = EscapeChar;
     201             :                     assert( nDstSize == 1
     202             :                         || (nDstSize == 2 && isHighSurrogate(aDst[0])
     203             :                             && isLowSurrogate(aDst[1])));
     204             :                     return nDstSize == 1
     205          16 :                         ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
     206             :                 }
     207          76 :                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
     208             :                          && pEnd - p >= 3 && p[0] == cEscapePrefix
     209          22 :                          && (nWeight1 = getHexWeight(p[1])) >= 0
     210          22 :                          && (nWeight2 = getHexWeight(p[2])) >= 0)
     211             :                 {
     212          22 :                     p += 3;
     213          22 :                     aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
     214             :                 }
     215          10 :                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
     216             :                          && p != pEnd && *p <= 0x7F)
     217             :                 {
     218           6 :                     aBuf.append(static_cast< char >(*p++));
     219             :                 }
     220             :                 else
     221             :                 {
     222             :                     assert(
     223             :                         (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
     224             :                         == 0);
     225             :                     break;
     226             :                 }
     227             :             }
     228           4 :             rtl_destroyTextToUnicodeConverter(aConverter);
     229           4 :             *pType = EscapeOctet;
     230             :         }
     231       40099 :         return nChar;
     232             :     }
     233             :     else
     234             :     {
     235    48654054 :         *pType = EscapeNo;
     236    48654054 :         return isHighSurrogate(nChar) && *pBegin < pEnd
     237          14 :                && isLowSurrogate(**pBegin) ?
     238    48654068 :                    combineSurrogates(nChar, *(*pBegin)++) : nChar;
     239             :     }
     240             : }
     241             : 
     242    40202573 : void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
     243             : {
     244             :     assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
     245    40202573 :     if (nUtf32 <= 0xFFFF) {
     246             :         writeUnicode(
     247    40202567 :             pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
     248             :     } else {
     249           6 :         nUtf32 -= 0x10000;
     250             :         writeUnicode(
     251             :             pBuffer, pCapacity,
     252           6 :             static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
     253             :         writeUnicode(
     254             :             pBuffer, pCapacity,
     255           6 :             static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
     256             :     }
     257    40202573 : }
     258             : 
     259       39673 : void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
     260             :                       sal_uInt32 nOctet)
     261             : {
     262             :     assert(nOctet <= 0xFF); // bad octet
     263             : 
     264             :     static sal_Unicode const aHex[16]
     265             :         = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
     266             :             0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
     267             : 
     268       39673 :     writeUnicode(pBuffer, pCapacity, cEscapePrefix);
     269       39673 :     writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
     270       39673 :     writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
     271       39673 : }
     272             : 
     273       39563 : bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
     274             :                      sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
     275             : {
     276             :     assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
     277       39563 :     if (eCharset == RTL_TEXTENCODING_UTF8) {
     278       11363 :         if (nUtf32 < 0x80)
     279       11337 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32);
     280          26 :         else if (nUtf32 < 0x800)
     281             :         {
     282           6 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
     283           6 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     284             :         }
     285          20 :         else if (nUtf32 < 0x10000)
     286             :         {
     287          14 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
     288          14 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
     289          14 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     290             :         }
     291             :         else
     292             :         {
     293           6 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
     294           6 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
     295           6 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
     296           6 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     297             :         }
     298             :     } else {
     299             :         rtl_UnicodeToTextConverter aConverter
     300       28200 :             = rtl_createUnicodeToTextConverter(eCharset);
     301             :         sal_Unicode aSrc[2];
     302             :         sal_Size nSrcSize;
     303       28200 :         if (nUtf32 <= 0xFFFF)
     304             :         {
     305       28198 :             aSrc[0] = static_cast< sal_Unicode >(nUtf32);
     306       28198 :             nSrcSize = 1;
     307             :         }
     308             :         else
     309             :         {
     310             :             aSrc[0] = static_cast< sal_Unicode >(
     311           2 :                 ((nUtf32 - 0x10000) >> 10) | 0xD800);
     312             :             aSrc[1] = static_cast< sal_Unicode >(
     313           2 :                 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
     314           2 :             nSrcSize = 2;
     315             :         }
     316             :         sal_Char aDst[32]; // FIXME  random value
     317             :         sal_uInt32 nInfo;
     318             :         sal_Size nConverted;
     319             :         sal_Size nDstSize = rtl_convertUnicodeToText(
     320             :             aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
     321             :             RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
     322             :             | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
     323             :             | RTL_UNICODETOTEXT_FLAGS_FLUSH,
     324       28200 :             &nInfo, &nConverted);
     325             :         assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
     326       28200 :         rtl_destroyUnicodeToTextConverter(aConverter);
     327       28200 :         if (nInfo == 0) {
     328             :             assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
     329       56400 :             for (sal_Size i = 0; i < nDstSize; ++i)
     330             :                 writeEscapeOctet(pBuffer, pCapacity,
     331       28206 :                                  static_cast< unsigned char >(aDst[i]));
     332             :                     // FIXME  all octets are escaped, even if there is no need
     333             :         } else {
     334           6 :             if (bStrict) {
     335           4 :                 return false;
     336             :             } else {
     337           2 :                 writeUcs4(pBuffer, pCapacity, nUtf32);
     338             :             }
     339             :         }
     340             :     }
     341       39559 :     return true;
     342             : }
     343             : 
     344             : struct Component
     345             : {
     346             :     sal_Unicode const * pBegin;
     347             :     sal_Unicode const * pEnd;
     348             : 
     349      171140 :     inline Component(): pBegin(0), pEnd(0) {}
     350             : 
     351       89188 :     inline bool isPresent() const { return pBegin != 0; }
     352             : 
     353             :     inline sal_Int32 getLength() const;
     354             : };
     355             : 
     356       54972 : inline sal_Int32 Component::getLength() const
     357             : {
     358             :     assert(isPresent()); // taking length of non-present component
     359       54972 :     return static_cast< sal_Int32 >(pEnd - pBegin);
     360             : }
     361             : 
     362       34228 : struct Components
     363             : {
     364             :     Component aScheme;
     365             :     Component aAuthority;
     366             :     Component aPath;
     367             :     Component aQuery;
     368             :     Component aFragment;
     369             : };
     370             : 
     371       34228 : void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
     372             : {
     373             :     // This algorithm is liberal and accepts various forms of illegal input.
     374             : 
     375       34228 :     sal_Unicode const * pBegin = pUriRef->buffer;
     376       34228 :     sal_Unicode const * pEnd = pBegin + pUriRef->length;
     377       34228 :     sal_Unicode const * pPos = pBegin;
     378             : 
     379       34228 :     if (pPos != pEnd && isAlpha(*pPos))
     380             :     {
     381      280395 :         for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
     382             :         {
     383      275645 :             if (*p == ':')
     384             :             {
     385       20483 :                 pComponents->aScheme.pBegin = pBegin;
     386       20483 :                 pComponents->aScheme.pEnd = ++p;
     387       20483 :                 pPos = p;
     388       20483 :                 break;
     389             :             }
     390      255162 :             else if (!isAlpha(*p) && !isDigit(*p) && *p != '+' && *p != '-'
     391             :                      && *p != '.')
     392             :             {
     393        8927 :                 break;
     394             :             }
     395             :         }
     396             :     }
     397             : 
     398       34228 :     if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
     399             :     {
     400       15001 :         pComponents->aAuthority.pBegin = pPos;
     401       15001 :         pPos += 2;
     402       30090 :         while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
     403          88 :             ++pPos;
     404       15001 :         pComponents->aAuthority.pEnd = pPos;
     405             :     }
     406             : 
     407       34228 :     pComponents->aPath.pBegin = pPos;
     408     1421019 :     while (pPos != pEnd && *pPos != '?' && * pPos != '#')
     409     1352563 :         ++pPos;
     410       34228 :     pComponents->aPath.pEnd = pPos;
     411             : 
     412       34228 :     if (pPos != pEnd && *pPos == '?')
     413             :     {
     414          96 :         pComponents->aQuery.pBegin = pPos++;
     415         306 :         while (pPos != pEnd && * pPos != '#')
     416         114 :             ++pPos;
     417          96 :         pComponents->aQuery.pEnd = pPos;
     418             :     }
     419             : 
     420       34228 :     if (pPos != pEnd)
     421             :     {
     422             :         assert(*pPos == '#');
     423          12 :         pComponents->aFragment.pBegin = pPos;
     424          12 :         pComponents->aFragment.pEnd = pEnd;
     425             :     }
     426       34228 : }
     427             : 
     428       13727 : rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath)
     429             : {
     430             :     assert(rBasePath.isPresent() && *rBasePath.pBegin == '/');
     431             :     assert(rRelPath.isPresent());
     432             : 
     433             :     // The invariant of aBuffer is that it always starts and ends with a slash
     434             :     // (until probably right at the end of the algorithm, when the last segment
     435             :     // of rRelPath is added, which does not necessarily end in a slash):
     436       13727 :     rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
     437             :         // XXX  numeric overflow
     438             : 
     439             :     // Segments "." and ".." within rBasePath are not conisdered special (but
     440             :     // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
     441             :     // bit unclear about this point:
     442       13727 :     sal_Int32 nFixed = 1;
     443       13727 :     sal_Unicode const * p = rBasePath.pBegin + 1;
     444      889523 :     for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
     445      875796 :         if (*q == '/')
     446             :         {
     447       96144 :             if (
     448             :                 (q - p == 1 && p[0] == '.') ||
     449          18 :                 (q - p == 2 && p[0] == '.' && p[1] == '.')
     450             :                )
     451             :             {
     452          20 :                 nFixed = q + 1 - rBasePath.pBegin;
     453             :             }
     454       96126 :             p = q + 1;
     455             :         }
     456       13727 :     aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin);
     457             : 
     458       13727 :     p = rRelPath.pBegin;
     459       13727 :     if (p != rRelPath.pEnd)
     460          88 :         for (;;)
     461             :         {
     462       13813 :             sal_Unicode const * q = p;
     463             :             sal_Unicode const * r;
     464      179764 :             for (;;)
     465             :             {
     466      193577 :                 if (q == rRelPath.pEnd)
     467             :                 {
     468       13725 :                     r = q;
     469       13725 :                     break;
     470             :                 }
     471      179852 :                 if (*q == '/')
     472             :                 {
     473          88 :                     r = q + 1;
     474          88 :                     break;
     475             :                 }
     476      179764 :                 ++q;
     477             :             }
     478       13813 :             if (q - p == 2 && p[0] == '.' && p[1] == '.')
     479             :             {
     480             :                 // Erroneous excess segments ".." within rRelPath are left
     481             :                 // intact, as the examples in RFC 2396, section C.2, suggest:
     482          48 :                 sal_Int32 i = aBuffer.getLength() - 1;
     483          48 :                 if (i < nFixed)
     484             :                 {
     485          12 :                     aBuffer.append(p, r - p);
     486          12 :                     nFixed += 3;
     487             :                 }
     488             :                 else
     489             :                 {
     490         116 :                     while (i > 0 && aBuffer[i - 1] != '/')
     491          44 :                         --i;
     492          36 :                     aBuffer.setLength(i);
     493          48 :                 }
     494             :             }
     495       13765 :             else if (q - p != 1 || *p != '.')
     496       13729 :                 aBuffer.append(p, r - p);
     497       13813 :             if (q == rRelPath.pEnd)
     498       13725 :                 break;
     499          88 :             p = q + 1;
     500             :         }
     501             : 
     502       13727 :     return aBuffer.makeStringAndClear();
     503             : }
     504             : 
     505             : }
     506             : 
     507        1334 : sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
     508             :     SAL_THROW_EXTERN_C()
     509             : {
     510             :     static sal_Bool const aCharClass[][nCharClassSize]
     511             :     = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
     512             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     513             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
     514             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
     515             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
     516             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
     517             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
     518             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  /*pqrstuvwxyz{|}~ */
     519             :        },
     520             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
     521             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     522             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
     523             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
     524             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     525             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     526             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     527             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     528             :        },
     529             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
     530             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     531             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     532             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
     533             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     534             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     535             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     536             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     537             :        },
     538             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
     539             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     540             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     541             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     542             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     543             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     544             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     545             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     546             :        },
     547             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
     548             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     549             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     550             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     551             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     552             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     553             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     554             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     555             :        },
     556             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
     557             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     558             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     559             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     560             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     561             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     562             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     563             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     564             :        },
     565             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
     566             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     567             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     568             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
     569             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     570             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     571             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     572             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     573             :        },
     574             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
     575             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     576             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
     577             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
     578             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     579             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     580             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     581             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     582             :        }};
     583             :     assert(
     584             :         (eCharClass >= 0
     585             :          && (sal::static_int_cast< std::size_t >(eCharClass)
     586             :              < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
     587        1334 :     return aCharClass[eCharClass];
     588             : }
     589             : 
     590      154709 : void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
     591             :                             rtl_UriEncodeMechanism eMechanism,
     592             :                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
     593             :     SAL_THROW_EXTERN_C()
     594             : {
     595             :     assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
     596             : 
     597      154709 :     sal_Unicode const * p = pText->buffer;
     598      154709 :     sal_Unicode const * pEnd = p + pText->length;
     599      154709 :     sal_Int32 nCapacity = pText->length;
     600      154709 :     rtl_uString_new_WithLength(pResult, nCapacity);
     601      154709 :     while (p < pEnd)
     602             :     {
     603             :         EscapeType eType;
     604             :         sal_uInt32 nUtf32 = readUcs4(
     605             :             &p, pEnd,
     606             :             (eMechanism == rtl_UriEncodeKeepEscapes
     607             :              || eMechanism == rtl_UriEncodeCheckEscapes
     608             :              || eMechanism == rtl_UriEncodeStrictKeepEscapes),
     609     8491574 :             eCharset, &eType);
     610     8491574 :         switch (eType)
     611             :         {
     612             :         case EscapeNo:
     613     8491558 :             if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
     614             :                 writeUnicode(pResult, &nCapacity,
     615     8452003 :                              static_cast< sal_Unicode >(nUtf32));
     616       39555 :             else if (!writeEscapeChar(
     617             :                          pResult, &nCapacity, nUtf32, eCharset,
     618             :                          (eMechanism == rtl_UriEncodeStrict
     619       39555 :                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
     620             :             {
     621           4 :                 rtl_uString_new(pResult);
     622             :                 return;
     623             :             }
     624     8491554 :             break;
     625             : 
     626             :         case EscapeChar:
     627          14 :             if (eMechanism == rtl_UriEncodeCheckEscapes
     628           6 :                 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
     629             :                 writeUnicode(pResult, &nCapacity,
     630           0 :                              static_cast< sal_Unicode >(nUtf32));
     631           8 :             else if (!writeEscapeChar(
     632             :                          pResult, &nCapacity, nUtf32, eCharset,
     633             :                          (eMechanism == rtl_UriEncodeStrict
     634           8 :                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
     635             :             {
     636           0 :                 rtl_uString_new(pResult);
     637             :                 return;
     638             :             }
     639           8 :             break;
     640             : 
     641             :         case EscapeOctet:
     642           8 :             writeEscapeOctet(pResult, &nCapacity, nUtf32);
     643           8 :             break;
     644             :         }
     645             :     }
     646      154705 :     *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
     647             : }
     648             : 
     649      779910 : void SAL_CALL rtl_uriDecode(rtl_uString * pText,
     650             :                             rtl_UriDecodeMechanism eMechanism,
     651             :                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
     652             :     SAL_THROW_EXTERN_C()
     653             : {
     654      779910 :     switch (eMechanism)
     655             :     {
     656             :     case rtl_UriDecodeNone:
     657           0 :         rtl_uString_assign(pResult, pText);
     658           0 :         break;
     659             : 
     660             :     case rtl_UriDecodeToIuri:
     661          10 :         eCharset = RTL_TEXTENCODING_UTF8;
     662             :     default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
     663             :         {
     664      779910 :             sal_Unicode const * p = pText->buffer;
     665      779910 :             sal_Unicode const * pEnd = p + pText->length;
     666      779910 :             sal_Int32 nCapacity = pText->length;
     667      779910 :             rtl_uString_new_WithLength(pResult, nCapacity);
     668    41762435 :             while (p < pEnd)
     669             :             {
     670             :                 EscapeType eType;
     671    40202621 :                 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
     672    40202621 :                 switch (eType)
     673             :                 {
     674             :                 case EscapeChar:
     675       40077 :                     if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
     676             :                     {
     677           2 :                         writeEscapeOctet(pResult, &nCapacity, nUtf32);
     678           2 :                         break;
     679             :                     }
     680             :                 case EscapeNo:
     681    40202571 :                     writeUcs4(pResult, &nCapacity, nUtf32);
     682    40202571 :                     break;
     683             : 
     684             :                 case EscapeOctet:
     685          48 :                     if (eMechanism == rtl_UriDecodeStrict) {
     686           6 :                         rtl_uString_new(pResult);
     687      779910 :                         return;
     688             :                     }
     689          42 :                     writeEscapeOctet(pResult, &nCapacity, nUtf32);
     690          42 :                     break;
     691             :                 }
     692             :             }
     693      779904 :             *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
     694             :         }
     695      779904 :         break;
     696             :     }
     697             : }
     698             : 
     699       20485 : sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
     700             :                                          rtl_uString * pRelUriRef,
     701             :                                          rtl_uString ** pResult,
     702             :                                          rtl_uString ** pException)
     703             :     SAL_THROW_EXTERN_C()
     704             : {
     705             :     // If pRelUriRef starts with a scheme component it is an absolute URI
     706             :     // reference, and we are done (i.e., this algorithm does not support
     707             :     // backwards-compatible relative URIs starting with a scheme component, see
     708             :     // RFC 2396, section 5.2, step 3):
     709       20485 :     Components aRelComponents;
     710       20485 :     parseUriRef(pRelUriRef, &aRelComponents);
     711       20485 :     if (aRelComponents.aScheme.isPresent())
     712             :     {
     713        6742 :         rtl_uString_assign(pResult, pRelUriRef);
     714        6742 :         return true;
     715             :     }
     716             : 
     717             :     // Parse pBaseUriRef; if the scheme component is not present or not valid,
     718             :     // or the path component is not empty and starts with anything but a slash,
     719             :     // an exception is raised:
     720       13743 :     Components aBaseComponents;
     721       13743 :     parseUriRef(pBaseUriRef, &aBaseComponents);
     722       13743 :     if (!aBaseComponents.aScheme.isPresent())
     723             :     {
     724           2 :         rtl::OUString aMessage(pBaseUriRef);
     725             :         aMessage += rtl::OUString(
     726             :                         RTL_CONSTASCII_USTRINGPARAM(
     727           2 :                             " does not start with a scheme component"));
     728             :         rtl_uString_assign(pException,
     729           2 :                            const_cast< rtl::OUString & >(aMessage).pData);
     730           2 :         return false;
     731             :     }
     732       13741 :     if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
     733             :         && *aBaseComponents.aPath.pBegin != '/')
     734             :     {
     735           2 :         rtl::OUString aMessage(pBaseUriRef);
     736             :         aMessage += rtl::OUString(
     737             :                         RTL_CONSTASCII_USTRINGPARAM(
     738           2 :                             "path component does not start with slash"));
     739           2 :         rtl_uString_assign(pException, aMessage.pData);
     740           2 :         return false;
     741             :     }
     742             : 
     743             :     // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
     744             :     // into an absolute one (if the relative URI is a reference to the "current
     745             :     // document," the "current document" is here taken to be the base URI):
     746       13739 :     rtl::OUStringBuffer aBuffer;
     747             :     aBuffer.append(aBaseComponents.aScheme.pBegin,
     748       13739 :                    aBaseComponents.aScheme.getLength());
     749       13739 :     if (aRelComponents.aAuthority.isPresent())
     750             :     {
     751             :         aBuffer.append(aRelComponents.aAuthority.pBegin,
     752           2 :                        aRelComponents.aAuthority.getLength());
     753             :         aBuffer.append(aRelComponents.aPath.pBegin,
     754           2 :                        aRelComponents.aPath.getLength());
     755           2 :         if (aRelComponents.aQuery.isPresent())
     756             :             aBuffer.append(aRelComponents.aQuery.pBegin,
     757           0 :                            aRelComponents.aQuery.getLength());
     758             :     }
     759             :     else
     760             :     {
     761       13737 :         if (aBaseComponents.aAuthority.isPresent())
     762             :             aBuffer.append(aBaseComponents.aAuthority.pBegin,
     763       13737 :                            aBaseComponents.aAuthority.getLength());
     764       13743 :         if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
     765           6 :             && !aRelComponents.aQuery.isPresent())
     766             :         {
     767             :             aBuffer.append(aBaseComponents.aPath.pBegin,
     768           4 :                            aBaseComponents.aPath.getLength());
     769           4 :             if (aBaseComponents.aQuery.isPresent())
     770             :                 aBuffer.append(aBaseComponents.aQuery.pBegin,
     771           4 :                                aBaseComponents.aQuery.getLength());
     772             :         }
     773             :         else
     774             :         {
     775       13733 :             if (*aRelComponents.aPath.pBegin == '/')
     776             :                 aBuffer.append(aRelComponents.aPath.pBegin,
     777           6 :                                aRelComponents.aPath.getLength());
     778             :             else
     779             :                 aBuffer.append(joinPaths(aBaseComponents.aPath,
     780       13727 :                                          aRelComponents.aPath));
     781       13733 :             if (aRelComponents.aQuery.isPresent())
     782             :                 aBuffer.append(aRelComponents.aQuery.pBegin,
     783          12 :                                aRelComponents.aQuery.getLength());
     784             :         }
     785             :     }
     786       13739 :     if (aRelComponents.aFragment.isPresent())
     787             :         aBuffer.append(aRelComponents.aFragment.pBegin,
     788          12 :                        aRelComponents.aFragment.getLength());
     789       13739 :     rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
     790       13739 :     return true;
     791             : }
     792             : 
     793             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10