LCOV - code coverage report
Current view: top level - oox/source/vml - vmlinputstream.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 133 154 86.4 %
Date: 2014-11-03 Functions: 18 20 90.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "oox/vml/vmlinputstream.hxx"
      21             : 
      22             : #include <com/sun/star/io/XTextInputStream2.hpp>
      23             : #include <map>
      24             : #include <string.h>
      25             : #include <rtl/strbuf.hxx>
      26             : #include "oox/helper/helper.hxx"
      27             : #include "oox/helper/textinputstream.hxx"
      28             : 
      29             : namespace oox {
      30             : namespace vml {
      31             : 
      32             : using namespace ::com::sun::star::io;
      33             : using namespace ::com::sun::star::uno;
      34             : 
      35             : namespace {
      36             : 
      37          52 : inline const sal_Char* lclFindCharacter( const sal_Char* pcBeg, const sal_Char* pcEnd, sal_Char cChar )
      38             : {
      39          52 :     sal_Int32 nIndex = rtl_str_indexOfChar_WithLength( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ), cChar );
      40          52 :     return (nIndex < 0) ? pcEnd : (pcBeg + nIndex);
      41             : }
      42             : 
      43        6414 : inline bool lclIsWhiteSpace( sal_Char cChar )
      44             : {
      45        6414 :     return cChar < 32;
      46             : }
      47             : 
      48         280 : const sal_Char* lclFindWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
      49             : {
      50        6176 :     for( ; pcBeg < pcEnd; ++pcBeg )
      51        5918 :         if( lclIsWhiteSpace( *pcBeg ) )
      52          22 :             return pcBeg;
      53         258 :     return pcEnd;
      54             : }
      55             : 
      56         526 : const sal_Char* lclFindNonWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
      57             : {
      58         802 :     for( ; pcBeg < pcEnd; ++pcBeg )
      59         460 :         if( !lclIsWhiteSpace( *pcBeg ) )
      60         184 :             return pcBeg;
      61         342 :     return pcEnd;
      62             : }
      63             : 
      64          24 : const sal_Char* lclTrimWhiteSpaceFromEnd( const sal_Char* pcBeg, const sal_Char* pcEnd )
      65             : {
      66          48 :     while( (pcBeg < pcEnd) && lclIsWhiteSpace( pcEnd[ -1 ] ) )
      67           0 :         --pcEnd;
      68          24 :     return pcEnd;
      69             : }
      70             : 
      71         288 : inline void lclAppendToBuffer( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
      72             : {
      73         288 :     rBuffer.append( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ) );
      74         288 : }
      75             : 
      76          20 : void lclProcessAttribs( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
      77             : {
      78             :     /*  Map attribute names to char-pointer of all attributes. This map is used
      79             :         to find multiple occurrences of attributes with the same name. The
      80             :         mapped pointers are used as map key in the next map below. */
      81             :     typedef ::std::map< OString, const sal_Char* > AttributeNameMap;
      82          20 :     AttributeNameMap aAttributeNames;
      83             : 
      84             :     /*  Map the char-pointers of all attributes to the full attribute definition
      85             :         string. This preserves the original order of the used attributes. */
      86             :     typedef ::std::map< const sal_Char*, OString > AttributeDataMap;
      87          40 :     AttributeDataMap aAttributes;
      88             : 
      89          20 :     bool bOk = true;
      90          20 :     const sal_Char* pcNameBeg = pcBeg;
      91          68 :     while( bOk && (pcNameBeg < pcEnd) )
      92             :     {
      93             :         // pcNameBeg points to begin of attribute name, find equality sign
      94          28 :         const sal_Char* pcEqualSign = lclFindCharacter( pcNameBeg, pcEnd, '=' );
      95          28 :         if ((bOk = (pcEqualSign < pcEnd)) == true)
      96             :         {
      97             :             // find end of attribute name (ignore whitespace between name and equality sign)
      98          24 :             const sal_Char* pcNameEnd = lclTrimWhiteSpaceFromEnd( pcNameBeg, pcEqualSign );
      99          24 :             if( (bOk = (pcNameBeg < pcNameEnd)) == true )
     100             :             {
     101             :                 // find begin of attribute value (must be single or double quote)
     102          24 :                 const sal_Char* pcValueBeg = lclFindNonWhiteSpace( pcEqualSign + 1, pcEnd );
     103          24 :                 if( (bOk = (pcValueBeg < pcEnd) && ((*pcValueBeg == '\'') || (*pcValueBeg == '"'))) == true )
     104             :                 {
     105             :                     // find end of attribute value (matching quote character)
     106          24 :                     const sal_Char* pcValueEnd = lclFindCharacter( pcValueBeg + 1, pcEnd, *pcValueBeg );
     107          24 :                     if( (bOk = (pcValueEnd < pcEnd)) == true )
     108             :                     {
     109          24 :                         ++pcValueEnd;
     110          24 :                         OString aAttribName( pcNameBeg, static_cast< sal_Int32 >( pcNameEnd - pcNameBeg ) );
     111          48 :                         OString aAttribData( pcNameBeg, static_cast< sal_Int32 >( pcValueEnd - pcNameBeg ) );
     112             :                         // search for an existing attribute with the same name
     113          24 :                         AttributeNameMap::iterator aIt = aAttributeNames.find( aAttribName );
     114             :                         // remove its definition from the data map
     115          24 :                         if( aIt != aAttributeNames.end() )
     116           0 :                             aAttributes.erase( aIt->second );
     117             :                         // insert the attribute into both maps
     118          24 :                         aAttributeNames[ aAttribName ] = pcNameBeg;
     119          24 :                         aAttributes[ pcNameBeg ] = aAttribData;
     120             :                         // continue with next attribute (skip whitespace after this attribute)
     121          24 :                         pcNameBeg = pcValueEnd;
     122          24 :                         if( (pcNameBeg < pcEnd) && ((bOk = lclIsWhiteSpace( *pcNameBeg )) == true) )
     123          32 :                             pcNameBeg = lclFindNonWhiteSpace( pcNameBeg + 1, pcEnd );
     124             :                     }
     125             :                 }
     126             :             }
     127             :         }
     128             :     }
     129             : 
     130             :     // if no error has occurred, build the resulting attribute list
     131          20 :     if( bOk )
     132          32 :         for( AttributeDataMap::iterator aIt = aAttributes.begin(), aEnd = aAttributes.end(); aIt != aEnd; ++aIt )
     133          20 :             rBuffer.append( ' ' ).append( aIt->second );
     134             :     // on error, just append the complete passed string
     135             :     else
     136          28 :         lclAppendToBuffer( rBuffer, pcBeg, pcEnd );
     137          20 : }
     138             : 
     139         214 : void lclProcessElement( OStringBuffer& rBuffer, const OString& rElement )
     140             : {
     141             :     // check that passed string starts and ends with the brackets of an XML element
     142         214 :     sal_Int32 nElementLen = rElement.getLength();
     143         214 :     if( nElementLen == 0 )
     144         214 :         return;
     145             : 
     146         214 :     const sal_Char* pcOpen = rElement.getStr();
     147         214 :     const sal_Char* pcClose = pcOpen + nElementLen - 1;
     148             : 
     149             :     // no complete element found
     150         214 :     if( (pcOpen >= pcClose) || (*pcOpen != '<') || (*pcClose != '>') )
     151             :     {
     152             :         // just append all passed characters
     153           0 :         rBuffer.append( rElement );
     154             :     }
     155             : 
     156             :     // skip parser instructions: '<![...]>'
     157         214 :     else if( (nElementLen >= 5) && (pcOpen[ 1 ] == '!') && (pcOpen[ 2 ] == '[') && (pcClose[ -1 ] == ']') )
     158             :     {
     159             :         // do nothing
     160             :     }
     161             : 
     162             :     // replace '<br>' element with newline
     163         214 :     else if( (nElementLen >= 4) && (pcOpen[ 1 ] == 'b') && (pcOpen[ 2 ] == 'r') && (lclFindNonWhiteSpace( pcOpen + 3, pcClose ) == pcClose) )
     164             :     {
     165           0 :         rBuffer.append( '\n' );
     166             :     }
     167             : 
     168             :     // check start elements and simple elements for repeated attributes
     169         214 :     else if( pcOpen[ 1 ] != '/' )
     170             :     {
     171             :         // find positions of text content inside brackets, exclude '/' in '<simpleelement/>'
     172         148 :         const sal_Char* pcContentBeg = pcOpen + 1;
     173         148 :         bool bIsEmptyElement = pcClose[ -1 ] == '/';
     174         148 :         const sal_Char* pcContentEnd = bIsEmptyElement ? (pcClose - 1) : pcClose;
     175             :         // append opening bracket and element name to buffer
     176         148 :         const sal_Char* pcWhiteSpace = lclFindWhiteSpace( pcContentBeg, pcContentEnd );
     177         148 :         lclAppendToBuffer( rBuffer, pcOpen, pcWhiteSpace );
     178             :         // find begin of attributes, and process all attributes
     179         148 :         const sal_Char* pcAttribBeg = lclFindNonWhiteSpace( pcWhiteSpace, pcContentEnd );
     180         148 :         if( pcAttribBeg < pcContentEnd )
     181          20 :             lclProcessAttribs( rBuffer, pcAttribBeg, pcContentEnd );
     182             :         // close the element
     183         148 :         if( bIsEmptyElement )
     184          82 :             rBuffer.append( '/' );
     185         148 :         rBuffer.append( '>' );
     186             :     }
     187             : 
     188             :     // append end elements without further processing
     189             :     else
     190             :     {
     191          66 :         rBuffer.append( rElement );
     192             :     }
     193             : }
     194             : 
     195         214 : bool lclProcessCharacters( OStringBuffer& rBuffer, const OString& rChars )
     196             : {
     197             :     /*  MSO has a very weird way to store and handle whitespaces. The stream
     198             :         may contain lots of spaces, tabs, and newlines which have to be handled
     199             :         as single space character. This will be done in this function.
     200             : 
     201             :         If the element text contains a literal line break, it will be stored as
     202             :         <br> tag (without matching </br> element). This input stream wrapper
     203             :         will replace this element with a literal LF character (see below).
     204             : 
     205             :         A single space character for its own is stored as is. Example: The
     206             :         element
     207             :             <font> </font>
     208             :         represents a single space character. The XML parser will ignore this
     209             :         space character completely without issuing a 'characters' event. The
     210             :         VML import filter implementation has to react on this case manually.
     211             : 
     212             :         A single space character following another character is stored
     213             :         literally and must not be stipped away here. Example: The element
     214             :             <font>abc </font>
     215             :         contains the three letters a, b, and c, followed by a space character.
     216             : 
     217             :         Consecutive space characters, or a leading single space character, are
     218             :         stored in a <span> element. If there are N space characters (N > 1),
     219             :         then the <span> element contains exactly (N-1) NBSP (non-breaking
     220             :         space) characters, followed by a regular space character. Examples:
     221             :         The element
     222             :             <font><span style='mso-spacerun:yes'>\xA0\xA0\xA0 </span></font>
     223             :         represents 4 consecutive space characters. Has to be handled by the
     224             :         implementation. The element
     225             :             <font><span style='mso-spacerun:yes'> abc</span></font>
     226             :         represents a space characters followed by the letters a, b, c. These
     227             :         strings have to be handled by the VML import filter implementation.
     228             :      */
     229             : 
     230             :     // passed string ends with the leading opening bracket of an XML element
     231         214 :     const sal_Char* pcBeg = rChars.getStr();
     232         214 :     const sal_Char* pcEnd = pcBeg + rChars.getLength();
     233         214 :     bool bHasBracket = (pcBeg < pcEnd) && (pcEnd[ -1 ] == '<');
     234         214 :     if( bHasBracket ) --pcEnd;
     235             : 
     236             :     // skip leading whitespace
     237         214 :     const sal_Char* pcContentsBeg = lclFindNonWhiteSpace( pcBeg, pcEnd );
     238         560 :     while( pcContentsBeg < pcEnd )
     239             :     {
     240         132 :         const sal_Char* pcWhitespaceBeg = lclFindWhiteSpace( pcContentsBeg + 1, pcEnd );
     241         132 :         lclAppendToBuffer( rBuffer, pcContentsBeg, pcWhitespaceBeg );
     242         132 :         if( pcWhitespaceBeg < pcEnd )
     243           2 :             rBuffer.append( ' ' );
     244         132 :         pcContentsBeg = lclFindNonWhiteSpace( pcWhitespaceBeg, pcEnd );
     245             :     }
     246             : 
     247         214 :     return bHasBracket;
     248             : }
     249             : 
     250             : } // namespace
     251             : 
     252           8 : InputStream::InputStream( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStrm ) :
     253             :     // use single-byte ISO-8859-1 encoding which maps all byte characters to the first 256 Unicode characters
     254             :     mxTextStrm( TextInputStream::createXTextInputStream( rxContext, rxInStrm, RTL_TEXTENCODING_ISO_8859_1 ) ),
     255             :     maOpeningBracket( 1 ),
     256             :     maClosingBracket( 1 ),
     257             :     maOpeningCData( CREATE_OSTRING( "<![CDATA[" ) ),
     258             :     maClosingCData( CREATE_OSTRING( "]]>" ) ),
     259           8 :     mnBufferPos( 0 )
     260             : {
     261           8 :     if (!mxTextStrm.is())
     262           0 :         throw IOException();
     263           8 :     maOpeningBracket[ 0 ] = '<';
     264           8 :     maClosingBracket[ 0 ] = '>';
     265           8 : }
     266             : 
     267          16 : InputStream::~InputStream()
     268             : {
     269          16 : }
     270             : 
     271          16 : sal_Int32 SAL_CALL InputStream::readBytes( Sequence< sal_Int8 >& rData, sal_Int32 nBytesToRead )
     272             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException, std::exception)
     273             : {
     274          16 :     if( nBytesToRead < 0 )
     275           0 :         throw IOException();
     276             : 
     277          16 :     rData.realloc( nBytesToRead );
     278          16 :     sal_Int8* pcDest = rData.getArray();
     279          16 :     sal_Int32 nRet = 0;
     280         246 :     while( (nBytesToRead > 0) && !mxTextStrm->isEOF() )
     281             :     {
     282         214 :         updateBuffer();
     283         214 :         sal_Int32 nReadSize = ::std::min( nBytesToRead, maBuffer.getLength() - mnBufferPos );
     284         214 :         if( nReadSize > 0 )
     285             :         {
     286         214 :             memcpy( pcDest + nRet, maBuffer.getStr() + mnBufferPos, static_cast< size_t >( nReadSize ) );
     287         214 :             mnBufferPos += nReadSize;
     288         214 :             nBytesToRead -= nReadSize;
     289         214 :             nRet += nReadSize;
     290             :         }
     291             :     }
     292          16 :     if( nRet < rData.getLength() )
     293          16 :         rData.realloc( nRet );
     294          16 :     return nRet;
     295             : }
     296             : 
     297          16 : sal_Int32 SAL_CALL InputStream::readSomeBytes( Sequence< sal_Int8 >& rData, sal_Int32 nMaxBytesToRead )
     298             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException, std::exception)
     299             : {
     300          16 :     return readBytes( rData, nMaxBytesToRead );
     301             : }
     302             : 
     303           0 : void SAL_CALL InputStream::skipBytes( sal_Int32 nBytesToSkip )
     304             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException, std::exception)
     305             : {
     306           0 :     if( nBytesToSkip < 0 )
     307           0 :         throw IOException();
     308             : 
     309           0 :     while( (nBytesToSkip > 0) && !mxTextStrm->isEOF() )
     310             :     {
     311           0 :         updateBuffer();
     312           0 :         sal_Int32 nSkipSize = ::std::min( nBytesToSkip, maBuffer.getLength() - mnBufferPos );
     313           0 :         mnBufferPos += nSkipSize;
     314           0 :         nBytesToSkip -= nSkipSize;
     315             :     }
     316           0 : }
     317             : 
     318           8 : sal_Int32 SAL_CALL InputStream::available() throw (NotConnectedException, IOException, RuntimeException, std::exception)
     319             : {
     320           8 :     updateBuffer();
     321           8 :     return maBuffer.getLength() - mnBufferPos;
     322             : }
     323             : 
     324           0 : void SAL_CALL InputStream::closeInput() throw (NotConnectedException, IOException, RuntimeException, std::exception)
     325             : {
     326           0 :     mxTextStrm->closeInput();
     327           0 : }
     328             : 
     329             : // private --------------------------------------------------------------------
     330             : 
     331         222 : void InputStream::updateBuffer() throw (IOException, RuntimeException)
     332             : {
     333         658 :     while( (mnBufferPos >= maBuffer.getLength()) && !mxTextStrm->isEOF() )
     334             :     {
     335             :         // collect new contents in a string buffer
     336         214 :         OStringBuffer aBuffer;
     337             : 
     338             :         // read and process characters until the opening bracket of the next XML element
     339         428 :         OString aChars = readToElementBegin();
     340         214 :         bool bHasOpeningBracket = lclProcessCharacters( aBuffer, aChars );
     341             : 
     342             :         // read and process characters until (and including) closing bracket (an XML element)
     343             :         OSL_ENSURE( bHasOpeningBracket || mxTextStrm->isEOF(), "InputStream::updateBuffer - missing opening bracket of XML element" );
     344         214 :         if( bHasOpeningBracket && !mxTextStrm->isEOF() )
     345             :         {
     346             :             // read the element text (add the leading opening bracket manually)
     347         214 :             OString aElement = OString( '<' ) + readToElementEnd();
     348             :             // check for CDATA part, starting with '<![CDATA['
     349         214 :             if( aElement.match( maOpeningCData ) )
     350             :             {
     351             :                 // search the end tag ']]>'
     352           0 :                 while( ((aElement.getLength() < maClosingCData.getLength()) || !aElement.endsWith( maClosingCData )) && !mxTextStrm->isEOF() )
     353           0 :                     aElement += readToElementEnd();
     354             :                 // copy the entire CDATA part
     355           0 :                 aBuffer.append( aElement );
     356             :             }
     357             :             else
     358             :             {
     359             :                 // no CDATA part - process the contents of the element
     360         214 :                 lclProcessElement( aBuffer, aElement );
     361         214 :             }
     362             :         }
     363             : 
     364         214 :         maBuffer = aBuffer.makeStringAndClear();
     365         214 :         mnBufferPos = 0;
     366         214 :     }
     367         222 : }
     368             : 
     369         214 : OString InputStream::readToElementBegin() throw (IOException, RuntimeException)
     370             : {
     371         214 :     return OUStringToOString( mxTextStrm->readString( maOpeningBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
     372             : }
     373             : 
     374         214 : OString InputStream::readToElementEnd() throw (IOException, RuntimeException)
     375             : {
     376         214 :     OString aText = OUStringToOString( mxTextStrm->readString( maClosingBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
     377             :     OSL_ENSURE( aText.endsWith(">"), "InputStream::readToElementEnd - missing closing bracket of XML element" );
     378         214 :     return aText;
     379             : }
     380             : 
     381             : } // namespace vml
     382             : } // namespave oox
     383             : 
     384             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10