LCOV - libreoffice_filtered.info - /usr/local/src/libreoffice/sal/rtl/uri.cxx

LCOV - code coverage report

Current view:	top level - usr/local/src/libreoffice/sal/rtl - uri.cxx (source / functions)		Hit	Total	Coverage
Test:	libreoffice_filtered.info	Lines:	298	304	98.0 %
Date:	2013-07-09	Functions:	20	20	100.0 %
Legend:	Lines: hit not hit

          Line data    Source code

       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "surrogates.hxx"
      21             : 
      22             : #include "osl/diagnose.h"
      23             : #include "rtl/character.hxx"
      24             : #include "rtl/strbuf.hxx"
      25             : #include "rtl/textenc.h"
      26             : #include "rtl/textcvt.h"
      27             : #include "rtl/uri.h"
      28             : #include "rtl/ustrbuf.h"
      29             : #include "rtl/ustrbuf.hxx"
      30             : #include "rtl/ustring.h"
      31             : #include "rtl/ustring.hxx"
      32             : #include "sal/types.h"
      33             : #include "sal/macros.h"
      34             : 
      35             : #include <cstddef>
      36             : 
      37             : namespace {
      38             : 
      39             : std::size_t const nCharClassSize = 128;
      40             : 
      41             : sal_Unicode const cEscapePrefix = 0x25; // '%'
      42             : 
      43    97834651 : inline bool isHighSurrogate(sal_uInt32 nUtf16)
      44             : {
      45    97834651 :     return SAL_RTL_IS_HIGH_SURROGATE(nUtf16);
      46             : }
      47             : 
      48          22 : inline bool isLowSurrogate(sal_uInt32 nUtf16)
      49             : {
      50          22 :     return SAL_RTL_IS_LOW_SURROGATE(nUtf16);
      51             : }
      52             : 
      53           5 : inline sal_uInt32 combineSurrogates(sal_uInt32 high, sal_uInt32 low)
      54             : {
      55           5 :     return SAL_RTL_COMBINE_SURROGATES(high, low);
      56             : }
      57             : 
      58       43012 : inline int getHexWeight(sal_uInt32 nUtf32)
      59             : {
      60       43011 :     return nUtf32 >= 0x30 && nUtf32 <= 0x39 ? // '0'--'9'
      61       42176 :                static_cast< int >(nUtf32 - 0x30) :
      62         835 :            nUtf32 >= 0x41 && nUtf32 <= 0x46 ? // 'A'--'F'
      63         781 :                static_cast< int >(nUtf32 - 0x41 + 10) :
      64          54 :            nUtf32 >= 0x61 && nUtf32 <= 0x66 ? // 'a'--'f'
      65          54 :                static_cast< int >(nUtf32 - 0x61 + 10) :
      66       86023 :                -1; // not a hex digit
      67             : }
      68             : 
      69    28039236 : inline bool isValid(sal_Bool const * pCharClass, sal_uInt32 nUtf32)
      70             : {
      71    28039236 :     return nUtf32 < nCharClassSize && pCharClass[nUtf32];
      72             : }
      73             : 
      74    97896585 : inline void writeUnicode(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
      75             :                          sal_Unicode cChar)
      76             : {
      77    97896585 :     rtl_uStringbuffer_insert(pBuffer, pCapacity, (*pBuffer)->length, &cChar, 1);
      78    97896585 : }
      79             : 
      80             : enum EscapeType
      81             : {
      82             :     EscapeNo,
      83             :     EscapeChar,
      84             :     EscapeOctet
      85             : };
      86             : 
      87             : /* Read any of the following:
      88             : 
      89             :    - sequence of escape sequences representing character from eCharset,
      90             :      translated to single UCS4 character; or
      91             : 
      92             :    - pair of UTF-16 surrogates, translated to single UCS4 character; or
      93             : 
      94             :    _ single UTF-16 character, extended to UCS4 character.
      95             :  */
      96    97856085 : sal_uInt32 readUcs4(sal_Unicode const ** pBegin, sal_Unicode const * pEnd,
      97             :                     bool bEncoded, rtl_TextEncoding eCharset,
      98             :                     EscapeType * pType)
      99             : {
     100    97856085 :     sal_uInt32 nChar = *(*pBegin)++;
     101             :     int nWeight1;
     102             :     int nWeight2;
     103    97877552 :     if (nChar == cEscapePrefix && bEncoded && pEnd - *pBegin >= 2
     104       21455 :         && (nWeight1 = getHexWeight((*pBegin)[0])) >= 0
     105    97877539 :         && (nWeight2 = getHexWeight((*pBegin)[1])) >= 0)
     106             :     {
     107       21454 :         *pBegin += 2;
     108       21454 :         nChar = static_cast< sal_uInt32 >(nWeight1 << 4 | nWeight2);
     109       21454 :         if (nChar <= 0x7F)
     110       21405 :             *pType = EscapeChar;
     111          49 :         else if (eCharset == RTL_TEXTENCODING_UTF8)
     112             :         {
     113          39 :             if (nChar >= 0xC0 && nChar <= 0xF4)
     114             :             {
     115             :                 sal_uInt32 nEncoded;
     116             :                 int nShift;
     117             :                 sal_uInt32 nMin;
     118          22 :                 if (nChar <= 0xDF)
     119             :                 {
     120           4 :                     nEncoded = (nChar & 0x1F) << 6;
     121           4 :                     nShift = 0;
     122           4 :                     nMin = 0x80;
     123             :                 }
     124          18 :                 else if (nChar <= 0xEF)
     125             :                 {
     126          16 :                     nEncoded = (nChar & 0x0F) << 12;
     127          16 :                     nShift = 6;
     128          16 :                     nMin = 0x800;
     129             :                 }
     130             :                 else
     131             :                 {
     132           2 :                     nEncoded = (nChar & 0x07) << 18;
     133           2 :                     nShift = 12;
     134           2 :                     nMin = 0x10000;
     135             :                 }
     136          22 :                 sal_Unicode const * p = *pBegin;
     137          22 :                 bool bUTF8 = true;
     138          62 :                 for (; nShift >= 0; nShift -= 6)
     139             :                 {
     140         123 :                     if (pEnd - p < 3 || p[0] != cEscapePrefix
     141          41 :                         || (nWeight1 = getHexWeight(p[1])) < 8
     142          41 :                         || nWeight1 > 11
     143          81 :                         || (nWeight2 = getHexWeight(p[2])) < 0)
     144             :                     {
     145           1 :                         bUTF8 = sal_False;
     146           1 :                         break;
     147             :                     }
     148          40 :                     p += 3;
     149          40 :                     nEncoded |= ((nWeight1 & 3) << 4 | nWeight2) << nShift;
     150             :                 }
     151          43 :                 if (bUTF8 && nEncoded >= nMin && !isHighSurrogate(nEncoded)
     152          37 :                     && !isLowSurrogate(nEncoded) && nEncoded <= 0x10FFFF)
     153             :                 {
     154          13 :                     *pBegin = p;
     155          13 :                     *pType = EscapeChar;
     156          13 :                     return nEncoded;
     157             :                 }
     158             :             }
     159          26 :             *pType = EscapeOctet;
     160             :         }
     161             :         else
     162             :         {
     163          10 :             rtl::OStringBuffer aBuf;
     164          10 :             aBuf.append(static_cast< char >(nChar));
     165             :             rtl_TextToUnicodeConverter aConverter
     166          10 :                 = rtl_createTextToUnicodeConverter(eCharset);
     167          10 :             sal_Unicode const * p = *pBegin;
     168             :             for (;;)
     169             :             {
     170             :                 sal_Unicode aDst[2];
     171             :                 sal_uInt32 nInfo;
     172             :                 sal_Size nConverted;
     173             :                 sal_Size nDstSize = rtl_convertTextToUnicode(
     174          24 :                     aConverter, 0, aBuf.getStr(), aBuf.getLength(), aDst,
     175             :                     SAL_N_ELEMENTS( aDst ),
     176             :                     (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_ERROR
     177             :                      | RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_ERROR
     178             :                      | RTL_TEXTTOUNICODE_FLAGS_INVALID_ERROR),
     179          24 :                     &nInfo, &nConverted);
     180          24 :                 if (nInfo == 0)
     181             :                 {
     182             :                     assert( nConverted
     183             :                         == sal::static_int_cast< sal_uInt32 >(
     184             :                             aBuf.getLength()));
     185           8 :                     rtl_destroyTextToUnicodeConverter(aConverter);
     186           8 :                     *pBegin = p;
     187           8 :                     *pType = EscapeChar;
     188             :                     assert( nDstSize == 1
     189             :                         || (nDstSize == 2 && isHighSurrogate(aDst[0])
     190             :                             && isLowSurrogate(aDst[1])));
     191             :                     return nDstSize == 1
     192           8 :                         ? aDst[0] : combineSurrogates(aDst[0], aDst[1]);
     193             :                 }
     194          32 :                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
     195          14 :                          && pEnd - p >= 3 && p[0] == cEscapePrefix
     196          11 :                          && (nWeight1 = getHexWeight(p[1])) >= 0
     197          27 :                          && (nWeight2 = getHexWeight(p[2])) >= 0)
     198             :                 {
     199          11 :                     p += 3;
     200          11 :                     aBuf.append(static_cast< char >(nWeight1 << 4 | nWeight2));
     201             :                 }
     202           5 :                 else if (nInfo == RTL_TEXTTOUNICODE_INFO_SRCBUFFERTOSMALL
     203           3 :                          && p != pEnd && *p <= 0x7F)
     204             :                 {
     205           3 :                     aBuf.append(static_cast< char >(*p++));
     206             :                 }
     207             :                 else
     208             :                 {
     209             :                     assert(
     210             :                         (nInfo & RTL_TEXTTOUNICODE_INFO_DESTBUFFERTOSMALL)
     211             :                         == 0);
     212           2 :                     break;
     213             :                 }
     214          14 :             }
     215           2 :             rtl_destroyTextToUnicodeConverter(aConverter);
     216           2 :             *pType = EscapeOctet;
     217             :         }
     218       21433 :         return nChar;
     219             :     }
     220             :     else
     221             :     {
     222    97834631 :         *pType = EscapeNo;
     223    97834638 :         return isHighSurrogate(nChar) && *pBegin < pEnd
     224           7 :                && isLowSurrogate(**pBegin) ?
     225    97834635 :                    combineSurrogates(nChar, *(*pBegin)++) : nChar;
     226             :     }
     227             : }
     228             : 
     229    69816764 : void writeUcs4(rtl_uString ** pBuffer, sal_Int32 * pCapacity, sal_uInt32 nUtf32)
     230             : {
     231             :     assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
     232    69816764 :     if (nUtf32 <= 0xFFFF) {
     233             :         writeUnicode(
     234    69816761 :             pBuffer, pCapacity, static_cast< sal_Unicode >(nUtf32));
     235             :     } else {
     236           3 :         nUtf32 -= 0x10000;
     237             :         writeUnicode(
     238             :             pBuffer, pCapacity,
     239           3 :             static_cast< sal_Unicode >(nUtf32 >> 10 | 0xD800));
     240             :         writeUnicode(
     241             :             pBuffer, pCapacity,
     242           3 :             static_cast< sal_Unicode >((nUtf32 & 0x3FF) | 0xDC00));
     243             :     }
     244    69816764 : }
     245             : 
     246       20235 : void writeEscapeOctet(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
     247             :                       sal_uInt32 nOctet)
     248             : {
     249             :     assert(nOctet <= 0xFF); // bad octet
     250             : 
     251             :     static sal_Unicode const aHex[16]
     252             :         = { 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
     253             :             0x41, 0x42, 0x43, 0x44, 0x45, 0x46 }; /* '0'--'9', 'A'--'F' */
     254             : 
     255       20235 :     writeUnicode(pBuffer, pCapacity, cEscapePrefix);
     256       20235 :     writeUnicode(pBuffer, pCapacity, aHex[nOctet >> 4]);
     257       20235 :     writeUnicode(pBuffer, pCapacity, aHex[nOctet & 15]);
     258       20235 : }
     259             : 
     260       20180 : bool writeEscapeChar(rtl_uString ** pBuffer, sal_Int32 * pCapacity,
     261             :                      sal_uInt32 nUtf32, rtl_TextEncoding eCharset, bool bStrict)
     262             : {
     263             :     assert(nUtf32 <= 0x10FFFF); // bad UTF-32 char
     264       20180 :     if (eCharset == RTL_TEXTENCODING_UTF8) {
     265        6080 :         if (nUtf32 < 0x80)
     266        6067 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32);
     267          13 :         else if (nUtf32 < 0x800)
     268             :         {
     269           3 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 6 | 0xC0);
     270           3 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     271             :         }
     272          10 :         else if (nUtf32 < 0x10000)
     273             :         {
     274           7 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 12 | 0xE0);
     275           7 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
     276           7 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     277             :         }
     278             :         else
     279             :         {
     280           3 :             writeEscapeOctet(pBuffer, pCapacity, nUtf32 >> 18 | 0xF0);
     281           3 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 12 & 0x3F) | 0x80);
     282           3 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 >> 6 & 0x3F) | 0x80);
     283           3 :             writeEscapeOctet(pBuffer, pCapacity, (nUtf32 & 0x3F) | 0x80);
     284             :         }
     285             :     } else {
     286             :         rtl_UnicodeToTextConverter aConverter
     287       14100 :             = rtl_createUnicodeToTextConverter(eCharset);
     288             :         sal_Unicode aSrc[2];
     289             :         sal_Size nSrcSize;
     290       14100 :         if (nUtf32 <= 0xFFFF)
     291             :         {
     292       14099 :             aSrc[0] = static_cast< sal_Unicode >(nUtf32);
     293       14099 :             nSrcSize = 1;
     294             :         }
     295             :         else
     296             :         {
     297             :             aSrc[0] = static_cast< sal_Unicode >(
     298           1 :                 ((nUtf32 - 0x10000) >> 10) | 0xD800);
     299             :             aSrc[1] = static_cast< sal_Unicode >(
     300           1 :                 ((nUtf32 - 0x10000) & 0x3FF) | 0xDC00);
     301           1 :             nSrcSize = 2;
     302             :         }
     303             :         sal_Char aDst[32]; // FIXME  random value
     304             :         sal_uInt32 nInfo;
     305             :         sal_Size nConverted;
     306             :         sal_Size nDstSize = rtl_convertUnicodeToText(
     307             :             aConverter, 0, aSrc, nSrcSize, aDst, sizeof aDst,
     308             :             RTL_UNICODETOTEXT_FLAGS_UNDEFINED_ERROR
     309             :             | RTL_UNICODETOTEXT_FLAGS_INVALID_ERROR
     310             :             | RTL_UNICODETOTEXT_FLAGS_FLUSH,
     311       14100 :             &nInfo, &nConverted);
     312             :         assert((nInfo & RTL_UNICODETOTEXT_INFO_DESTBUFFERTOSMALL) == 0);
     313       14100 :         rtl_destroyUnicodeToTextConverter(aConverter);
     314       14100 :         if (nInfo == 0) {
     315             :             assert(nConverted == nSrcSize); // bad rtl_convertUnicodeToText
     316       28200 :             for (sal_Size i = 0; i < nDstSize; ++i)
     317             :                 writeEscapeOctet(pBuffer, pCapacity,
     318       14103 :                                  static_cast< unsigned char >(aDst[i]));
     319             :                     // FIXME  all octets are escaped, even if there is no need
     320             :         } else {
     321           3 :             if (bStrict) {
     322           2 :                 return false;
     323             :             } else {
     324           1 :                 writeUcs4(pBuffer, pCapacity, nUtf32);
     325             :             }
     326             :         }
     327             :     }
     328       20178 :     return true;
     329             : }
     330             : 
     331             : struct Component
     332             : {
     333             :     sal_Unicode const * pBegin;
     334             :     sal_Unicode const * pEnd;
     335             : 
     336      339755 :     inline Component(): pBegin(0), pEnd(0) {}
     337             : 
     338      153817 :     inline bool isPresent() const { return pBegin != 0; }
     339             : 
     340             :     inline sal_Int32 getLength() const;
     341             : };
     342             : 
     343       85872 : inline sal_Int32 Component::getLength() const
     344             : {
     345             :     assert(isPresent()); // taking length of non-present component
     346       85872 :     return static_cast< sal_Int32 >(pEnd - pBegin);
     347             : }
     348             : 
     349       67951 : struct Components
     350             : {
     351             :     Component aScheme;
     352             :     Component aAuthority;
     353             :     Component aPath;
     354             :     Component aQuery;
     355             :     Component aFragment;
     356             : };
     357             : 
     358       67951 : void parseUriRef(rtl_uString const * pUriRef, Components * pComponents)
     359             : {
     360             :     // This algorithm is liberal and accepts various forms of illegal input.
     361             : 
     362       67951 :     sal_Unicode const * pBegin = pUriRef->buffer;
     363       67951 :     sal_Unicode const * pEnd = pBegin + pUriRef->length;
     364       67951 :     sal_Unicode const * pPos = pBegin;
     365             : 
     366       67951 :     if (pPos != pEnd && rtl::isAsciiAlpha(*pPos))
     367             :     {
     368      747175 :         for (sal_Unicode const * p = pPos + 1; p != pEnd; ++p)
     369             :         {
     370      736812 :             if (*p == ':')
     371             :             {
     372       46482 :                 pComponents->aScheme.pBegin = pBegin;
     373       46482 :                 pComponents->aScheme.pEnd = ++p;
     374       46482 :                 pPos = p;
     375       46482 :                 break;
     376             :             }
     377     1474520 :             else if (!rtl::isAsciiAlphanumeric(*p) && *p != '+' && *p != '-'
     378      784190 :                      && *p != '.')
     379             :             {
     380       11028 :                 break;
     381             :             }
     382             :         }
     383             :     }
     384             : 
     385       67951 :     if (pEnd - pPos >= 2 && pPos[0] == '/' && pPos[1] == '/')
     386             :     {
     387       22151 :         pComponents->aAuthority.pBegin = pPos;
     388       22151 :         pPos += 2;
     389       44346 :         while (pPos != pEnd && *pPos != '/' && *pPos != '?' && *pPos != '#')
     390          44 :             ++pPos;
     391       22151 :         pComponents->aAuthority.pEnd = pPos;
     392             :     }
     393             : 
     394       67951 :     pComponents->aPath.pBegin = pPos;
     395     2763486 :     while (pPos != pEnd && *pPos != '?' && * pPos != '#')
     396     2627584 :         ++pPos;
     397       67951 :     pComponents->aPath.pEnd = pPos;
     398             : 
     399       67951 :     if (pPos != pEnd && *pPos == '?')
     400             :     {
     401          48 :         pComponents->aQuery.pBegin = pPos++;
     402         153 :         while (pPos != pEnd && * pPos != '#')
     403          57 :             ++pPos;
     404          48 :         pComponents->aQuery.pEnd = pPos;
     405             :     }
     406             : 
     407       67951 :     if (pPos != pEnd)
     408             :     {
     409             :         assert(*pPos == '#');
     410           6 :         pComponents->aFragment.pBegin = pPos;
     411           6 :         pComponents->aFragment.pEnd = pEnd;
     412             :     }
     413       67951 : }
     414             : 
     415       21460 : rtl::OUString joinPaths(Component const & rBasePath, Component const & rRelPath)
     416             : {
     417             :     assert(rBasePath.isPresent() && *rBasePath.pBegin == '/');
     418             :     assert(rRelPath.isPresent());
     419             : 
     420             :     // The invariant of aBuffer is that it always starts and ends with a slash
     421             :     // (until probably right at the end of the algorithm, when the last segment
     422             :     // of rRelPath is added, which does not necessarily end in a slash):
     423       21460 :     rtl::OUStringBuffer aBuffer(rBasePath.getLength() + rRelPath.getLength());
     424             :         // XXX  numeric overflow
     425             : 
     426             :     // Segments "." and ".." within rBasePath are not conisdered special (but
     427             :     // are also not removed by ".." segments within rRelPath), RFC 2396 seems a
     428             :     // bit unclear about this point:
     429       21460 :     sal_Int32 nFixed = 1;
     430       21460 :     sal_Unicode const * p = rBasePath.pBegin + 1;
     431     1562224 :     for (sal_Unicode const * q = p; q != rBasePath.pEnd; ++q)
     432     1540764 :         if (*q == '/')
     433             :         {
     434      170454 :             if (
     435      340979 :                 (q - p == 1 && p[0] == '.') ||
     436      170506 :                 (q - p == 2 && p[0] == '.' && p[1] == '.')
     437             :                )
     438             :             {
     439          54 :                 nFixed = q + 1 - rBasePath.pBegin;
     440             :             }
     441      170454 :             p = q + 1;
     442             :         }
     443       21460 :     aBuffer.append(rBasePath.pBegin, p - rBasePath.pBegin);
     444             : 
     445       21460 :     p = rRelPath.pBegin;
     446       21460 :     if (p != rRelPath.pEnd)
     447             :         for (;;)
     448             :         {
     449       21547 :             sal_Unicode const * q = p;
     450             :             sal_Unicode const * r;
     451             :             for (;;)
     452             :             {
     453      295767 :                 if (q == rRelPath.pEnd)
     454             :                 {
     455       21459 :                     r = q;
     456       21459 :                     break;
     457             :                 }
     458      274308 :                 if (*q == '/')
     459             :                 {
     460          88 :                     r = q + 1;
     461          88 :                     break;
     462             :                 }
     463      274220 :                 ++q;
     464             :             }
     465       21547 :             if (q - p == 2 && p[0] == '.' && p[1] == '.')
     466             :             {
     467             :                 // Erroneous excess segments ".." within rRelPath are left
     468             :                 // intact, as the examples in RFC 2396, section C.2, suggest:
     469          24 :                 sal_Int32 i = aBuffer.getLength() - 1;
     470          24 :                 if (i < nFixed)
     471             :                 {
     472           6 :                     aBuffer.append(p, r - p);
     473           6 :                     nFixed += 3;
     474             :                 }
     475             :                 else
     476             :                 {
     477          58 :                     while (i > 0 && aBuffer[i - 1] != '/')
     478          22 :                         --i;
     479          18 :                     aBuffer.setLength(i);
     480          24 :                 }
     481             :             }
     482       21523 :             else if (q - p != 1 || *p != '.')
     483       21461 :                 aBuffer.append(p, r - p);
     484       21547 :             if (q == rRelPath.pEnd)
     485       21459 :                 break;
     486          88 :             p = q + 1;
     487      274308 :         }
     488             : 
     489       21460 :     return aBuffer.makeStringAndClear();
     490             : }
     491             : 
     492             : }
     493             : 
     494       10443 : sal_Bool const * SAL_CALL rtl_getUriCharClass(rtl_UriCharClass eCharClass)
     495             :     SAL_THROW_EXTERN_C()
     496             : {
     497             :     static sal_Bool const aCharClass[][nCharClassSize]
     498             :     = {{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* None */
     499             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     500             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* !"#$%&'()*+,-./*/
     501             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*0123456789:;<=>?*/
     502             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*@ABCDEFGHIJKLMNO*/
     503             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*PQRSTUVWXYZ[\]^_*/
     504             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /*`abcdefghijklmno*/
     505             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0  /*pqrstuvwxyz{|}~ */
     506             :        },
     507             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Uric */
     508             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     509             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* !"#$%&'()*+,-./*/
     510             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
     511             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     512             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     513             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     514             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     515             :        },
     516             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UricNoSlash */
     517             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     518             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     519             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, /*0123456789:;<=>?*/
     520             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     521             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     522             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     523             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     524             :        },
     525             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RelSegment */
     526             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     527             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     528             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     529             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     530             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     531             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     532             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     533             :        },
     534             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* RegName */
     535             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     536             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     537             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     538             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     539             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     540             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     541             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     542             :        },
     543             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Userinfo */
     544             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     545             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     546             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 0, /*0123456789:;<=>?*/
     547             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     548             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     549             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     550             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     551             :        },
     552             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* Pchar */
     553             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     554             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, /* !"#$%&'()*+,-./*/
     555             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 0, /*0123456789:;<=>?*/
     556             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     557             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     558             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     559             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     560             :        },
     561             :        { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UnoParamValue */
     562             :          0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
     563             :          0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, /* !"#$%&'()*+,-./*/
     564             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*0123456789:;<=>?*/
     565             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*@ABCDEFGHIJKLMNO*/
     566             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /*PQRSTUVWXYZ[\]^_*/
     567             :          0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /*`abcdefghijklmno*/
     568             :          1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0  /*pqrstuvwxyz{|}~ */
     569             :        }};
     570             :     assert(
     571             :         (eCharClass >= 0
     572             :          && (sal::static_int_cast< std::size_t >(eCharClass)
     573             :              < SAL_N_ELEMENTS(aCharClass)))); // bad eCharClass
     574       10443 :     return aCharClass[eCharClass];
     575             : }
     576             : 
     577      341377 : void SAL_CALL rtl_uriEncode(rtl_uString * pText, sal_Bool const * pCharClass,
     578             :                             rtl_UriEncodeMechanism eMechanism,
     579             :                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
     580             :     SAL_THROW_EXTERN_C()
     581             : {
     582             :     assert(!pCharClass[0x25]); // make sure the percent sign is encoded...
     583             : 
     584      341377 :     sal_Unicode const * p = pText->buffer;
     585      341377 :     sal_Unicode const * pEnd = p + pText->length;
     586      341377 :     sal_Int32 nCapacity = pText->length;
     587      341377 :     rtl_uString_new_WithLength(pResult, nCapacity);
     588      341377 :     while (p < pEnd)
     589             :     {
     590             :         EscapeType eType;
     591             :         sal_uInt32 nUtf32 = readUcs4(
     592             :             &p, pEnd,
     593             :             (eMechanism == rtl_UriEncodeKeepEscapes
     594    28001275 :              || eMechanism == rtl_UriEncodeCheckEscapes
     595    56011888 :              || eMechanism == rtl_UriEncodeStrictKeepEscapes),
     596    56078594 :             eCharset, &eType);
     597    28039297 :         switch (eType)
     598             :         {
     599             :         case EscapeNo:
     600    28039233 :             if (isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
     601             :                 writeUnicode(pResult, &nCapacity,
     602    28019113 :                              static_cast< sal_Unicode >(nUtf32));
     603       20120 :             else if (!writeEscapeChar(
     604             :                          pResult, &nCapacity, nUtf32, eCharset,
     605             :                          (eMechanism == rtl_UriEncodeStrict
     606       20120 :                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
     607             :             {
     608           2 :                 rtl_uString_new(pResult);
     609           2 :                 return;
     610             :             }
     611    28039231 :             break;
     612             : 
     613             :         case EscapeChar:
     614          60 :             if (eMechanism == rtl_UriEncodeCheckEscapes
     615          60 :                 && isValid(pCharClass, nUtf32)) // implies nUtf32 <= 0x7F
     616             :                 writeUnicode(pResult, &nCapacity,
     617           0 :                              static_cast< sal_Unicode >(nUtf32));
     618          60 :             else if (!writeEscapeChar(
     619             :                          pResult, &nCapacity, nUtf32, eCharset,
     620             :                          (eMechanism == rtl_UriEncodeStrict
     621          60 :                           || eMechanism == rtl_UriEncodeStrictKeepEscapes)))
     622             :             {
     623           0 :                 rtl_uString_new(pResult);
     624           0 :                 return;
     625             :             }
     626          60 :             break;
     627             : 
     628             :         case EscapeOctet:
     629           4 :             writeEscapeOctet(pResult, &nCapacity, nUtf32);
     630           4 :             break;
     631             :         }
     632             :     }
     633      341375 :     *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
     634             : }
     635             : 
     636      867517 : void SAL_CALL rtl_uriDecode(rtl_uString * pText,
     637             :                             rtl_UriDecodeMechanism eMechanism,
     638             :                             rtl_TextEncoding eCharset, rtl_uString ** pResult)
     639             :     SAL_THROW_EXTERN_C()
     640             : {
     641      867517 :     switch (eMechanism)
     642             :     {
     643             :     case rtl_UriDecodeNone:
     644           0 :         rtl_uString_assign(pResult, pText);
     645           0 :         break;
     646             : 
     647             :     case rtl_UriDecodeToIuri:
     648           5 :         eCharset = RTL_TEXTENCODING_UTF8;
     649             :     default: // rtl_UriDecodeWithCharset, rtl_UriDecodeStrict
     650             :         {
     651      867517 :             sal_Unicode const * p = pText->buffer;
     652      867517 :             sal_Unicode const * pEnd = p + pText->length;
     653      867517 :             sal_Int32 nCapacity = pText->length;
     654      867517 :             rtl_uString_new_WithLength(pResult, nCapacity);
     655    71551819 :             while (p < pEnd)
     656             :             {
     657             :                 EscapeType eType;
     658    69816788 :                 sal_uInt32 nUtf32 = readUcs4(&p, pEnd, true, eCharset, &eType);
     659    69816788 :                 switch (eType)
     660             :                 {
     661             :                 case EscapeChar:
     662       21366 :                     if (nUtf32 <= 0x7F && eMechanism == rtl_UriDecodeToIuri)
     663             :                     {
     664           1 :                         writeEscapeOctet(pResult, &nCapacity, nUtf32);
     665           1 :                         break;
     666             :                     }
     667             :                 case EscapeNo:
     668    69816763 :                     writeUcs4(pResult, &nCapacity, nUtf32);
     669    69816763 :                     break;
     670             : 
     671             :                 case EscapeOctet:
     672          24 :                     if (eMechanism == rtl_UriDecodeStrict) {
     673           3 :                         rtl_uString_new(pResult);
     674      867520 :                         return;
     675             :                     }
     676          21 :                     writeEscapeOctet(pResult, &nCapacity, nUtf32);
     677          21 :                     break;
     678             :                 }
     679             :             }
     680      867514 :             *pResult = rtl_uStringBuffer_makeStringAndClear( pResult, &nCapacity );
     681             :         }
     682      867514 :         break;
     683             :     }
     684             : }
     685             : 
     686       46483 : sal_Bool SAL_CALL rtl_uriConvertRelToAbs(rtl_uString * pBaseUriRef,
     687             :                                          rtl_uString * pRelUriRef,
     688             :                                          rtl_uString ** pResult,
     689             :                                          rtl_uString ** pException)
     690             :     SAL_THROW_EXTERN_C()
     691             : {
     692             :     // If pRelUriRef starts with a scheme component it is an absolute URI
     693             :     // reference, and we are done (i.e., this algorithm does not support
     694             :     // backwards-compatible relative URIs starting with a scheme component, see
     695             :     // RFC 2396, section 5.2, step 3):
     696       46483 :     Components aRelComponents;
     697       46483 :     parseUriRef(pRelUriRef, &aRelComponents);
     698       46483 :     if (aRelComponents.aScheme.isPresent())
     699             :     {
     700       25015 :         rtl_uString_assign(pResult, pRelUriRef);
     701       25015 :         return true;
     702             :     }
     703             : 
     704             :     // Parse pBaseUriRef; if the scheme component is not present or not valid,
     705             :     // or the path component is not empty and starts with anything but a slash,
     706             :     // an exception is raised:
     707       21468 :     Components aBaseComponents;
     708       21468 :     parseUriRef(pBaseUriRef, &aBaseComponents);
     709       21468 :     if (!aBaseComponents.aScheme.isPresent())
     710             :     {
     711           1 :         rtl::OUString aMessage(pBaseUriRef);
     712           2 :         aMessage += rtl::OUString(
     713           1 :                             " does not start with a scheme component");
     714             :         rtl_uString_assign(pException,
     715           1 :                            const_cast< rtl::OUString & >(aMessage).pData);
     716           1 :         return false;
     717             :     }
     718       21467 :     if (aBaseComponents.aPath.pBegin != aBaseComponents.aPath.pEnd
     719       21467 :         && *aBaseComponents.aPath.pBegin != '/')
     720             :     {
     721           1 :         rtl::OUString aMessage(pBaseUriRef);
     722           2 :         aMessage += rtl::OUString(
     723           1 :                             "path component does not start with slash");
     724           1 :         rtl_uString_assign(pException, aMessage.pData);
     725           1 :         return false;
     726             :     }
     727             : 
     728             :     // Use the algorithm from RFC 2396, section 5.2, to turn the relative URI
     729             :     // into an absolute one (if the relative URI is a reference to the "current
     730             :     // document," the "current document" is here taken to be the base URI):
     731       21466 :     rtl::OUStringBuffer aBuffer;
     732             :     aBuffer.append(aBaseComponents.aScheme.pBegin,
     733       21466 :                    aBaseComponents.aScheme.getLength());
     734       21466 :     if (aRelComponents.aAuthority.isPresent())
     735             :     {
     736             :         aBuffer.append(aRelComponents.aAuthority.pBegin,
     737           1 :                        aRelComponents.aAuthority.getLength());
     738             :         aBuffer.append(aRelComponents.aPath.pBegin,
     739           1 :                        aRelComponents.aPath.getLength());
     740           1 :         if (aRelComponents.aQuery.isPresent())
     741             :             aBuffer.append(aRelComponents.aQuery.pBegin,
     742           0 :                            aRelComponents.aQuery.getLength());
     743             :     }
     744             :     else
     745             :     {
     746       21465 :         if (aBaseComponents.aAuthority.isPresent())
     747             :             aBuffer.append(aBaseComponents.aAuthority.pBegin,
     748       21465 :                            aBaseComponents.aAuthority.getLength());
     749       42930 :         if (aRelComponents.aPath.pBegin == aRelComponents.aPath.pEnd
     750       21465 :             && !aRelComponents.aQuery.isPresent())
     751             :         {
     752             :             aBuffer.append(aBaseComponents.aPath.pBegin,
     753           2 :                            aBaseComponents.aPath.getLength());
     754           2 :             if (aBaseComponents.aQuery.isPresent())
     755             :                 aBuffer.append(aBaseComponents.aQuery.pBegin,
     756           2 :                                aBaseComponents.aQuery.getLength());
     757             :         }
     758             :         else
     759             :         {
     760       21463 :             if (*aRelComponents.aPath.pBegin == '/')
     761             :                 aBuffer.append(aRelComponents.aPath.pBegin,
     762           3 :                                aRelComponents.aPath.getLength());
     763             :             else
     764             :                 aBuffer.append(joinPaths(aBaseComponents.aPath,
     765       21460 :                                          aRelComponents.aPath));
     766       21463 :             if (aRelComponents.aQuery.isPresent())
     767             :                 aBuffer.append(aRelComponents.aQuery.pBegin,
     768           6 :                                aRelComponents.aQuery.getLength());
     769             :         }
     770             :     }
     771       21466 :     if (aRelComponents.aFragment.isPresent())
     772             :         aBuffer.append(aRelComponents.aFragment.pBegin,
     773           6 :                        aRelComponents.aFragment.getLength());
     774       21466 :     rtl_uString_assign(pResult, aBuffer.makeStringAndClear().pData);
     775       21466 :     return true;
     776             : }
     777             : 
     778             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10