LCOV - code coverage report
Current view: top level - oox/source/vml - vmlinputstream.cxx (source / functions) Hit Total Coverage
Test: commit c8344322a7af75b84dd3ca8f78b05543a976dfd5 Lines: 133 154 86.4 %
Date: 2015-06-13 12:38:46 Functions: 18 20 90.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "oox/vml/vmlinputstream.hxx"
      21             : 
      22             : #include <com/sun/star/io/XTextInputStream2.hpp>
      23             : #include <map>
      24             : #include <string.h>
      25             : #include <rtl/strbuf.hxx>
      26             : #include <osl/diagnose.h>
      27             : #include "oox/helper/helper.hxx"
      28             : #include "oox/helper/textinputstream.hxx"
      29             : 
      30             : namespace oox {
      31             : namespace vml {
      32             : 
      33             : using namespace ::com::sun::star::io;
      34             : using namespace ::com::sun::star::uno;
      35             : 
      36             : namespace {
      37             : 
      38          36 : inline const sal_Char* lclFindCharacter( const sal_Char* pcBeg, const sal_Char* pcEnd, sal_Char cChar )
      39             : {
      40          36 :     sal_Int32 nIndex = rtl_str_indexOfChar_WithLength( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ), cChar );
      41          36 :     return (nIndex < 0) ? pcEnd : (pcBeg + nIndex);
      42             : }
      43             : 
      44        4014 : inline bool lclIsWhiteSpace( sal_Char cChar )
      45             : {
      46        4014 :     return cChar < 32;
      47             : }
      48             : 
      49         194 : const sal_Char* lclFindWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
      50             : {
      51        3831 :     for( ; pcBeg < pcEnd; ++pcBeg )
      52        3652 :         if( lclIsWhiteSpace( *pcBeg ) )
      53          15 :             return pcBeg;
      54         179 :     return pcEnd;
      55             : }
      56             : 
      57         364 : const sal_Char* lclFindNonWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
      58             : {
      59         567 :     for( ; pcBeg < pcEnd; ++pcBeg )
      60         336 :         if( !lclIsWhiteSpace( *pcBeg ) )
      61         133 :             return pcBeg;
      62         231 :     return pcEnd;
      63             : }
      64             : 
      65          17 : const sal_Char* lclTrimWhiteSpaceFromEnd( const sal_Char* pcBeg, const sal_Char* pcEnd )
      66             : {
      67          34 :     while( (pcBeg < pcEnd) && lclIsWhiteSpace( pcEnd[ -1 ] ) )
      68           0 :         --pcEnd;
      69          17 :     return pcEnd;
      70             : }
      71             : 
      72         200 : inline void lclAppendToBuffer( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
      73             : {
      74         200 :     rBuffer.append( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ) );
      75         200 : }
      76             : 
      77          14 : void lclProcessAttribs( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
      78             : {
      79             :     /*  Map attribute names to char-pointer of all attributes. This map is used
      80             :         to find multiple occurrences of attributes with the same name. The
      81             :         mapped pointers are used as map key in the next map below. */
      82             :     typedef ::std::map< OString, const sal_Char* > AttributeNameMap;
      83          14 :     AttributeNameMap aAttributeNames;
      84             : 
      85             :     /*  Map the char-pointers of all attributes to the full attribute definition
      86             :         string. This preserves the original order of the used attributes. */
      87             :     typedef ::std::map< const sal_Char*, OString > AttributeDataMap;
      88          28 :     AttributeDataMap aAttributes;
      89             : 
      90          14 :     bool bOk = true;
      91          14 :     const sal_Char* pcNameBeg = pcBeg;
      92          47 :     while( bOk && (pcNameBeg < pcEnd) )
      93             :     {
      94             :         // pcNameBeg points to begin of attribute name, find equality sign
      95          19 :         const sal_Char* pcEqualSign = lclFindCharacter( pcNameBeg, pcEnd, '=' );
      96          19 :         if ((bOk = (pcEqualSign < pcEnd)))
      97             :         {
      98             :             // find end of attribute name (ignore whitespace between name and equality sign)
      99          17 :             const sal_Char* pcNameEnd = lclTrimWhiteSpaceFromEnd( pcNameBeg, pcEqualSign );
     100          17 :             if( (bOk = (pcNameBeg < pcNameEnd)) )
     101             :             {
     102             :                 // find begin of attribute value (must be single or double quote)
     103          17 :                 const sal_Char* pcValueBeg = lclFindNonWhiteSpace( pcEqualSign + 1, pcEnd );
     104          17 :                 if( (bOk = (pcValueBeg < pcEnd) && ((*pcValueBeg == '\'') || (*pcValueBeg == '"'))) )
     105             :                 {
     106             :                     // find end of attribute value (matching quote character)
     107          17 :                     const sal_Char* pcValueEnd = lclFindCharacter( pcValueBeg + 1, pcEnd, *pcValueBeg );
     108          17 :                     if( (bOk = (pcValueEnd < pcEnd)) )
     109             :                     {
     110          17 :                         ++pcValueEnd;
     111          17 :                         OString aAttribName( pcNameBeg, static_cast< sal_Int32 >( pcNameEnd - pcNameBeg ) );
     112          34 :                         OString aAttribData( pcNameBeg, static_cast< sal_Int32 >( pcValueEnd - pcNameBeg ) );
     113             :                         // search for an existing attribute with the same name
     114          17 :                         AttributeNameMap::iterator aIt = aAttributeNames.find( aAttribName );
     115             :                         // remove its definition from the data map
     116          17 :                         if( aIt != aAttributeNames.end() )
     117           0 :                             aAttributes.erase( aIt->second );
     118             :                         // insert the attribute into both maps
     119          17 :                         aAttributeNames[ aAttribName ] = pcNameBeg;
     120          17 :                         aAttributes[ pcNameBeg ] = aAttribData;
     121             :                         // continue with next attribute (skip whitespace after this attribute)
     122          17 :                         pcNameBeg = pcValueEnd;
     123          17 :                         if( (pcNameBeg < pcEnd) && ((bOk = lclIsWhiteSpace( *pcNameBeg ))) )
     124          22 :                             pcNameBeg = lclFindNonWhiteSpace( pcNameBeg + 1, pcEnd );
     125             :                     }
     126             :                 }
     127             :             }
     128             :         }
     129             :     }
     130             : 
     131             :     // if no error has occurred, build the resulting attribute list
     132          14 :     if( bOk )
     133          21 :         for( AttributeDataMap::iterator aIt = aAttributes.begin(), aEnd = aAttributes.end(); aIt != aEnd; ++aIt )
     134          13 :             rBuffer.append( ' ' ).append( aIt->second );
     135             :     // on error, just append the complete passed string
     136             :     else
     137          20 :         lclAppendToBuffer( rBuffer, pcBeg, pcEnd );
     138          14 : }
     139             : 
     140         143 : void lclProcessElement( OStringBuffer& rBuffer, const OString& rElement )
     141             : {
     142             :     // check that passed string starts and ends with the brackets of an XML element
     143         143 :     sal_Int32 nElementLen = rElement.getLength();
     144         143 :     if( nElementLen == 0 )
     145         143 :         return;
     146             : 
     147         143 :     const sal_Char* pcOpen = rElement.getStr();
     148         143 :     const sal_Char* pcClose = pcOpen + nElementLen - 1;
     149             : 
     150             :     // no complete element found
     151         143 :     if( (pcOpen >= pcClose) || (*pcOpen != '<') || (*pcClose != '>') )
     152             :     {
     153             :         // just append all passed characters
     154           0 :         rBuffer.append( rElement );
     155             :     }
     156             : 
     157             :     // skip parser instructions: '<![...]>'
     158         143 :     else if( (nElementLen >= 5) && (pcOpen[ 1 ] == '!') && (pcOpen[ 2 ] == '[') && (pcClose[ -1 ] == ']') )
     159             :     {
     160             :         // do nothing
     161             :     }
     162             : 
     163             :     // replace '<br>' element with newline
     164         143 :     else if( (nElementLen >= 4) && (pcOpen[ 1 ] == 'b') && (pcOpen[ 2 ] == 'r') && (lclFindNonWhiteSpace( pcOpen + 3, pcClose ) == pcClose) )
     165             :     {
     166           0 :         rBuffer.append( '\n' );
     167             :     }
     168             : 
     169             :     // check start elements and simple elements for repeated attributes
     170         143 :     else if( pcOpen[ 1 ] != '/' )
     171             :     {
     172             :         // find positions of text content inside brackets, exclude '/' in '<simpleelement/>'
     173          97 :         const sal_Char* pcContentBeg = pcOpen + 1;
     174          97 :         bool bIsEmptyElement = pcClose[ -1 ] == '/';
     175          97 :         const sal_Char* pcContentEnd = bIsEmptyElement ? (pcClose - 1) : pcClose;
     176             :         // append opening bracket and element name to buffer
     177          97 :         const sal_Char* pcWhiteSpace = lclFindWhiteSpace( pcContentBeg, pcContentEnd );
     178          97 :         lclAppendToBuffer( rBuffer, pcOpen, pcWhiteSpace );
     179             :         // find begin of attributes, and process all attributes
     180          97 :         const sal_Char* pcAttribBeg = lclFindNonWhiteSpace( pcWhiteSpace, pcContentEnd );
     181          97 :         if( pcAttribBeg < pcContentEnd )
     182          14 :             lclProcessAttribs( rBuffer, pcAttribBeg, pcContentEnd );
     183             :         // close the element
     184          97 :         if( bIsEmptyElement )
     185          51 :             rBuffer.append( '/' );
     186          97 :         rBuffer.append( '>' );
     187             :     }
     188             : 
     189             :     // append end elements without further processing
     190             :     else
     191             :     {
     192          46 :         rBuffer.append( rElement );
     193             :     }
     194             : }
     195             : 
     196         148 : bool lclProcessCharacters( OStringBuffer& rBuffer, const OString& rChars )
     197             : {
     198             :     /*  MSO has a very weird way to store and handle whitespaces. The stream
     199             :         may contain lots of spaces, tabs, and newlines which have to be handled
     200             :         as single space character. This will be done in this function.
     201             : 
     202             :         If the element text contains a literal line break, it will be stored as
     203             :         <br> tag (without matching </br> element). This input stream wrapper
     204             :         will replace this element with a literal LF character (see below).
     205             : 
     206             :         A single space character for its own is stored as is. Example: The
     207             :         element
     208             :             <font> </font>
     209             :         represents a single space character. The XML parser will ignore this
     210             :         space character completely without issuing a 'characters' event. The
     211             :         VML import filter implementation has to react on this case manually.
     212             : 
     213             :         A single space character following another character is stored
     214             :         literally and must not be stipped away here. Example: The element
     215             :             <font>abc </font>
     216             :         contains the three letters a, b, and c, followed by a space character.
     217             : 
     218             :         Consecutive space characters, or a leading single space character, are
     219             :         stored in a <span> element. If there are N space characters (N > 1),
     220             :         then the <span> element contains exactly (N-1) NBSP (non-breaking
     221             :         space) characters, followed by a regular space character. Examples:
     222             :         The element
     223             :             <font><span style='mso-spacerun:yes'>\xA0\xA0\xA0 </span></font>
     224             :         represents 4 consecutive space characters. Has to be handled by the
     225             :         implementation. The element
     226             :             <font><span style='mso-spacerun:yes'> abc</span></font>
     227             :         represents a space characters followed by the letters a, b, c. These
     228             :         strings have to be handled by the VML import filter implementation.
     229             :      */
     230             : 
     231             :     // passed string ends with the leading opening bracket of an XML element
     232         148 :     const sal_Char* pcBeg = rChars.getStr();
     233         148 :     const sal_Char* pcEnd = pcBeg + rChars.getLength();
     234         148 :     bool bHasBracket = (pcBeg < pcEnd) && (pcEnd[ -1 ] == '<');
     235         148 :     if( bHasBracket ) --pcEnd;
     236             : 
     237             :     // skip leading whitespace
     238         148 :     const sal_Char* pcContentsBeg = lclFindNonWhiteSpace( pcBeg, pcEnd );
     239         393 :     while( pcContentsBeg < pcEnd )
     240             :     {
     241          97 :         const sal_Char* pcWhitespaceBeg = lclFindWhiteSpace( pcContentsBeg + 1, pcEnd );
     242          97 :         lclAppendToBuffer( rBuffer, pcContentsBeg, pcWhitespaceBeg );
     243          97 :         if( pcWhitespaceBeg < pcEnd )
     244           1 :             rBuffer.append( ' ' );
     245          97 :         pcContentsBeg = lclFindNonWhiteSpace( pcWhitespaceBeg, pcEnd );
     246             :     }
     247             : 
     248         148 :     return bHasBracket;
     249             : }
     250             : 
     251             : } // namespace
     252             : 
     253           5 : InputStream::InputStream( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStrm ) :
     254             :     // use single-byte ISO-8859-1 encoding which maps all byte characters to the first 256 Unicode characters
     255             :     mxTextStrm( TextInputStream::createXTextInputStream( rxContext, rxInStrm, RTL_TEXTENCODING_ISO_8859_1 ) ),
     256             :     maOpeningBracket( 1 ),
     257             :     maClosingBracket( 1 ),
     258             :     maOpeningCData( CREATE_OSTRING( "<![CDATA[" ) ),
     259             :     maClosingCData( CREATE_OSTRING( "]]>" ) ),
     260           5 :     mnBufferPos( 0 )
     261             : {
     262           5 :     if (!mxTextStrm.is())
     263           0 :         throw IOException();
     264           5 :     maOpeningBracket[ 0 ] = '<';
     265           5 :     maClosingBracket[ 0 ] = '>';
     266           5 : }
     267             : 
     268          10 : InputStream::~InputStream()
     269             : {
     270          10 : }
     271             : 
     272          10 : sal_Int32 SAL_CALL InputStream::readBytes( Sequence< sal_Int8 >& rData, sal_Int32 nBytesToRead )
     273             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException, std::exception)
     274             : {
     275          10 :     if( nBytesToRead < 0 )
     276           0 :         throw IOException();
     277             : 
     278          10 :     rData.realloc( nBytesToRead );
     279          10 :     sal_Int8* pcDest = rData.getArray();
     280          10 :     sal_Int32 nRet = 0;
     281         168 :     while( (nBytesToRead > 0) && !mxTextStrm->isEOF() )
     282             :     {
     283         148 :         updateBuffer();
     284         148 :         sal_Int32 nReadSize = ::std::min( nBytesToRead, maBuffer.getLength() - mnBufferPos );
     285         148 :         if( nReadSize > 0 )
     286             :         {
     287         143 :             memcpy( pcDest + nRet, maBuffer.getStr() + mnBufferPos, static_cast< size_t >( nReadSize ) );
     288         143 :             mnBufferPos += nReadSize;
     289         143 :             nBytesToRead -= nReadSize;
     290         143 :             nRet += nReadSize;
     291             :         }
     292             :     }
     293          10 :     if( nRet < rData.getLength() )
     294          10 :         rData.realloc( nRet );
     295          10 :     return nRet;
     296             : }
     297             : 
     298          10 : sal_Int32 SAL_CALL InputStream::readSomeBytes( Sequence< sal_Int8 >& rData, sal_Int32 nMaxBytesToRead )
     299             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException, std::exception)
     300             : {
     301          10 :     return readBytes( rData, nMaxBytesToRead );
     302             : }
     303             : 
     304           0 : void SAL_CALL InputStream::skipBytes( sal_Int32 nBytesToSkip )
     305             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException, std::exception)
     306             : {
     307           0 :     if( nBytesToSkip < 0 )
     308           0 :         throw IOException();
     309             : 
     310           0 :     while( (nBytesToSkip > 0) && !mxTextStrm->isEOF() )
     311             :     {
     312           0 :         updateBuffer();
     313           0 :         sal_Int32 nSkipSize = ::std::min( nBytesToSkip, maBuffer.getLength() - mnBufferPos );
     314           0 :         mnBufferPos += nSkipSize;
     315           0 :         nBytesToSkip -= nSkipSize;
     316             :     }
     317           0 : }
     318             : 
     319           5 : sal_Int32 SAL_CALL InputStream::available() throw (NotConnectedException, IOException, RuntimeException, std::exception)
     320             : {
     321           5 :     updateBuffer();
     322           5 :     return maBuffer.getLength() - mnBufferPos;
     323             : }
     324             : 
     325           0 : void SAL_CALL InputStream::closeInput() throw (NotConnectedException, IOException, RuntimeException, std::exception)
     326             : {
     327           0 :     mxTextStrm->closeInput();
     328           0 : }
     329             : 
     330             : // private --------------------------------------------------------------------
     331             : 
     332         153 : void InputStream::updateBuffer() throw (IOException, RuntimeException)
     333             : {
     334         454 :     while( (mnBufferPos >= maBuffer.getLength()) && !mxTextStrm->isEOF() )
     335             :     {
     336             :         // collect new contents in a string buffer
     337         148 :         OStringBuffer aBuffer;
     338             : 
     339             :         // read and process characters until the opening bracket of the next XML element
     340         296 :         OString aChars = readToElementBegin();
     341         148 :         bool bHasOpeningBracket = lclProcessCharacters( aBuffer, aChars );
     342             : 
     343             :         // read and process characters until (and including) closing bracket (an XML element)
     344             :         OSL_ENSURE( bHasOpeningBracket || mxTextStrm->isEOF(), "InputStream::updateBuffer - missing opening bracket of XML element" );
     345         148 :         if( bHasOpeningBracket && !mxTextStrm->isEOF() )
     346             :         {
     347             :             // read the element text (add the leading opening bracket manually)
     348         143 :             OString aElement = OString( '<' ) + readToElementEnd();
     349             :             // check for CDATA part, starting with '<![CDATA['
     350         143 :             if( aElement.match( maOpeningCData ) )
     351             :             {
     352             :                 // search the end tag ']]>'
     353           0 :                 while( ((aElement.getLength() < maClosingCData.getLength()) || !aElement.endsWith( maClosingCData )) && !mxTextStrm->isEOF() )
     354           0 :                     aElement += readToElementEnd();
     355             :                 // copy the entire CDATA part
     356           0 :                 aBuffer.append( aElement );
     357             :             }
     358             :             else
     359             :             {
     360             :                 // no CDATA part - process the contents of the element
     361         143 :                 lclProcessElement( aBuffer, aElement );
     362         143 :             }
     363             :         }
     364             : 
     365         148 :         maBuffer = aBuffer.makeStringAndClear();
     366         148 :         mnBufferPos = 0;
     367         148 :     }
     368         153 : }
     369             : 
     370         148 : OString InputStream::readToElementBegin() throw (IOException, RuntimeException)
     371             : {
     372         148 :     return OUStringToOString( mxTextStrm->readString( maOpeningBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
     373             : }
     374             : 
     375         143 : OString InputStream::readToElementEnd() throw (IOException, RuntimeException)
     376             : {
     377         143 :     OString aText = OUStringToOString( mxTextStrm->readString( maClosingBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
     378             :     OSL_ENSURE( aText.endsWith(">"), "InputStream::readToElementEnd - missing closing bracket of XML element" );
     379         143 :     return aText;
     380             : }
     381             : 
     382             : } // namespace vml
     383             : } // namespave oox
     384             : 
     385             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.11