LCOV - code coverage report
Current view: top level - oox/source/vml - vmlinputstream.cxx (source / functions) Hit Total Coverage
Test: commit e02a6cb2c3e2b23b203b422e4e0680877f232636 Lines: 0 152 0.0 %
Date: 2014-04-14 Functions: 0 20 0.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "oox/vml/vmlinputstream.hxx"
      21             : 
      22             : #include <com/sun/star/io/XTextInputStream2.hpp>
      23             : #include <map>
      24             : #include <string.h>
      25             : #include <rtl/strbuf.hxx>
      26             : #include "oox/helper/helper.hxx"
      27             : #include "oox/helper/textinputstream.hxx"
      28             : 
      29             : namespace oox {
      30             : namespace vml {
      31             : 
      32             : 
      33             : 
      34             : using namespace ::com::sun::star::io;
      35             : using namespace ::com::sun::star::uno;
      36             : 
      37             : 
      38             : 
      39             : namespace {
      40             : 
      41           0 : inline const sal_Char* lclFindCharacter( const sal_Char* pcBeg, const sal_Char* pcEnd, sal_Char cChar )
      42             : {
      43           0 :     sal_Int32 nIndex = rtl_str_indexOfChar_WithLength( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ), cChar );
      44           0 :     return (nIndex < 0) ? pcEnd : (pcBeg + nIndex);
      45             : }
      46             : 
      47           0 : inline bool lclIsWhiteSpace( sal_Char cChar )
      48             : {
      49           0 :     return cChar < 32;
      50             : }
      51             : 
      52           0 : const sal_Char* lclFindWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
      53             : {
      54           0 :     for( ; pcBeg < pcEnd; ++pcBeg )
      55           0 :         if( lclIsWhiteSpace( *pcBeg ) )
      56           0 :             return pcBeg;
      57           0 :     return pcEnd;
      58             : }
      59             : 
      60           0 : const sal_Char* lclFindNonWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
      61             : {
      62           0 :     for( ; pcBeg < pcEnd; ++pcBeg )
      63           0 :         if( !lclIsWhiteSpace( *pcBeg ) )
      64           0 :             return pcBeg;
      65           0 :     return pcEnd;
      66             : }
      67             : 
      68           0 : const sal_Char* lclTrimWhiteSpaceFromEnd( const sal_Char* pcBeg, const sal_Char* pcEnd )
      69             : {
      70           0 :     while( (pcBeg < pcEnd) && lclIsWhiteSpace( pcEnd[ -1 ] ) )
      71           0 :         --pcEnd;
      72           0 :     return pcEnd;
      73             : }
      74             : 
      75           0 : inline void lclAppendToBuffer( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
      76             : {
      77           0 :     rBuffer.append( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ) );
      78           0 : }
      79             : 
      80             : 
      81             : 
      82           0 : void lclProcessAttribs( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
      83             : {
      84             :     /*  Map attribute names to char-pointer of all attributes. This map is used
      85             :         to find multiple occurrences of attributes with the same name. The
      86             :         mapped pointers are used as map key in the next map below. */
      87             :     typedef ::std::map< OString, const sal_Char* > AttributeNameMap;
      88           0 :     AttributeNameMap aAttributeNames;
      89             : 
      90             :     /*  Map the char-pointers of all attributes to the full attribute definition
      91             :         string. This preserves the original order of the used attributes. */
      92             :     typedef ::std::map< const sal_Char*, OString > AttributeDataMap;
      93           0 :     AttributeDataMap aAttributes;
      94             : 
      95           0 :     bool bOk = true;
      96           0 :     const sal_Char* pcNameBeg = pcBeg;
      97           0 :     while( bOk && (pcNameBeg < pcEnd) )
      98             :     {
      99             :         // pcNameBeg points to begin of attribute name, find equality sign
     100           0 :         const sal_Char* pcEqualSign = lclFindCharacter( pcNameBeg, pcEnd, '=' );
     101           0 :         if ((bOk = (pcEqualSign < pcEnd)) == true)
     102             :         {
     103             :             // find end of attribute name (ignore whitespace between name and equality sign)
     104           0 :             const sal_Char* pcNameEnd = lclTrimWhiteSpaceFromEnd( pcNameBeg, pcEqualSign );
     105           0 :             if( (bOk = (pcNameBeg < pcNameEnd)) == true )
     106             :             {
     107             :                 // find begin of attribute value (must be single or double quote)
     108           0 :                 const sal_Char* pcValueBeg = lclFindNonWhiteSpace( pcEqualSign + 1, pcEnd );
     109           0 :                 if( (bOk = (pcValueBeg < pcEnd) && ((*pcValueBeg == '\'') || (*pcValueBeg == '"'))) == true )
     110             :                 {
     111             :                     // find end of attribute value (matching quote character)
     112           0 :                     const sal_Char* pcValueEnd = lclFindCharacter( pcValueBeg + 1, pcEnd, *pcValueBeg );
     113           0 :                     if( (bOk = (pcValueEnd < pcEnd)) == true )
     114             :                     {
     115           0 :                         ++pcValueEnd;
     116           0 :                         OString aAttribName( pcNameBeg, static_cast< sal_Int32 >( pcNameEnd - pcNameBeg ) );
     117           0 :                         OString aAttribData( pcNameBeg, static_cast< sal_Int32 >( pcValueEnd - pcNameBeg ) );
     118             :                         // search for an existing attribute with the same name
     119           0 :                         AttributeNameMap::iterator aIt = aAttributeNames.find( aAttribName );
     120             :                         // remove its definition from the data map
     121           0 :                         if( aIt != aAttributeNames.end() )
     122           0 :                             aAttributes.erase( aIt->second );
     123             :                         // insert the attribute into both maps
     124           0 :                         aAttributeNames[ aAttribName ] = pcNameBeg;
     125           0 :                         aAttributes[ pcNameBeg ] = aAttribData;
     126             :                         // continue with next attribute (skip whitespace after this attribute)
     127           0 :                         pcNameBeg = pcValueEnd;
     128           0 :                         if( (pcNameBeg < pcEnd) && ((bOk = lclIsWhiteSpace( *pcNameBeg )) == true) )
     129           0 :                             pcNameBeg = lclFindNonWhiteSpace( pcNameBeg + 1, pcEnd );
     130             :                     }
     131             :                 }
     132             :             }
     133             :         }
     134             :     }
     135             : 
     136             :     // if no error has occurred, build the resulting attribute list
     137           0 :     if( bOk )
     138           0 :         for( AttributeDataMap::iterator aIt = aAttributes.begin(), aEnd = aAttributes.end(); aIt != aEnd; ++aIt )
     139           0 :             rBuffer.append( ' ' ).append( aIt->second );
     140             :     // on error, just append the complete passed string
     141             :     else
     142           0 :         lclAppendToBuffer( rBuffer, pcBeg, pcEnd );
     143           0 : }
     144             : 
     145           0 : void lclProcessElement( OStringBuffer& rBuffer, const OString& rElement )
     146             : {
     147             :     // check that passed string starts and ends with the brackets of an XML element
     148           0 :     sal_Int32 nElementLen = rElement.getLength();
     149           0 :     if( nElementLen == 0 )
     150           0 :         return;
     151             : 
     152           0 :     const sal_Char* pcOpen = rElement.getStr();
     153           0 :     const sal_Char* pcClose = pcOpen + nElementLen - 1;
     154             : 
     155             :     // no complete element found
     156           0 :     if( (pcOpen >= pcClose) || (*pcOpen != '<') || (*pcClose != '>') )
     157             :     {
     158             :         // just append all passed characters
     159           0 :         rBuffer.append( rElement );
     160             :     }
     161             : 
     162             :     // skip parser instructions: '<![...]>'
     163           0 :     else if( (nElementLen >= 5) && (pcOpen[ 1 ] == '!') && (pcOpen[ 2 ] == '[') && (pcClose[ -1 ] == ']') )
     164             :     {
     165             :         // do nothing
     166             :     }
     167             : 
     168             :     // replace '<br>' element with newline
     169           0 :     else if( (nElementLen >= 4) && (pcOpen[ 1 ] == 'b') && (pcOpen[ 2 ] == 'r') && (lclFindNonWhiteSpace( pcOpen + 3, pcClose ) == pcClose) )
     170             :     {
     171           0 :         rBuffer.append( '\n' );
     172             :     }
     173             : 
     174             :     // check start elements and simple elements for repeated attributes
     175           0 :     else if( pcOpen[ 1 ] != '/' )
     176             :     {
     177             :         // find positions of text content inside brackets, exclude '/' in '<simpleelement/>'
     178           0 :         const sal_Char* pcContentBeg = pcOpen + 1;
     179           0 :         bool bIsEmptyElement = pcClose[ -1 ] == '/';
     180           0 :         const sal_Char* pcContentEnd = bIsEmptyElement ? (pcClose - 1) : pcClose;
     181             :         // append opening bracket and element name to buffer
     182           0 :         const sal_Char* pcWhiteSpace = lclFindWhiteSpace( pcContentBeg, pcContentEnd );
     183           0 :         lclAppendToBuffer( rBuffer, pcOpen, pcWhiteSpace );
     184             :         // find begin of attributes, and process all attributes
     185           0 :         const sal_Char* pcAttribBeg = lclFindNonWhiteSpace( pcWhiteSpace, pcContentEnd );
     186           0 :         if( pcAttribBeg < pcContentEnd )
     187           0 :             lclProcessAttribs( rBuffer, pcAttribBeg, pcContentEnd );
     188             :         // close the element
     189           0 :         if( bIsEmptyElement )
     190           0 :             rBuffer.append( '/' );
     191           0 :         rBuffer.append( '>' );
     192             :     }
     193             : 
     194             :     // append end elements without further processing
     195             :     else
     196             :     {
     197           0 :         rBuffer.append( rElement );
     198             :     }
     199             : }
     200             : 
     201           0 : bool lclProcessCharacters( OStringBuffer& rBuffer, const OString& rChars )
     202             : {
     203             :     /*  MSO has a very weird way to store and handle whitespaces. The stream
     204             :         may contain lots of spaces, tabs, and newlines which have to be handled
     205             :         as single space character. This will be done in this function.
     206             : 
     207             :         If the element text contains a literal line break, it will be stored as
     208             :         <br> tag (without matching </br> element). This input stream wrapper
     209             :         will replace this element with a literal LF character (see below).
     210             : 
     211             :         A single space character for its own is stored as is. Example: The
     212             :         element
     213             :             <font> </font>
     214             :         represents a single space character. The XML parser will ignore this
     215             :         space character completely without issuing a 'characters' event. The
     216             :         VML import filter implementation has to react on this case manually.
     217             : 
     218             :         A single space character following another character is stored
     219             :         literally and must not be stipped away here. Example: The element
     220             :             <font>abc </font>
     221             :         contains the three letters a, b, and c, followed by a space character.
     222             : 
     223             :         Consecutive space characters, or a leading single space character, are
     224             :         stored in a <span> element. If there are N space characters (N > 1),
     225             :         then the <span> element contains exactly (N-1) NBSP (non-breaking
     226             :         space) characters, followed by a regular space character. Examples:
     227             :         The element
     228             :             <font><span style='mso-spacerun:yes'>\xA0\xA0\xA0 </span></font>
     229             :         represents 4 consecutive space characters. Has to be handled by the
     230             :         implementation. The element
     231             :             <font><span style='mso-spacerun:yes'> abc</span></font>
     232             :         represents a space characters followed by the letters a, b, c. These
     233             :         strings have to be handled by the VML import filter implementation.
     234             :      */
     235             : 
     236             :     // passed string ends with the leading opening bracket of an XML element
     237           0 :     const sal_Char* pcBeg = rChars.getStr();
     238           0 :     const sal_Char* pcEnd = pcBeg + rChars.getLength();
     239           0 :     bool bHasBracket = (pcBeg < pcEnd) && (pcEnd[ -1 ] == '<');
     240           0 :     if( bHasBracket ) --pcEnd;
     241             : 
     242             :     // skip leading whitespace
     243           0 :     const sal_Char* pcContentsBeg = lclFindNonWhiteSpace( pcBeg, pcEnd );
     244           0 :     while( pcContentsBeg < pcEnd )
     245             :     {
     246           0 :         const sal_Char* pcWhitespaceBeg = lclFindWhiteSpace( pcContentsBeg + 1, pcEnd );
     247           0 :         lclAppendToBuffer( rBuffer, pcContentsBeg, pcWhitespaceBeg );
     248           0 :         if( pcWhitespaceBeg < pcEnd )
     249           0 :             rBuffer.append( ' ' );
     250           0 :         pcContentsBeg = lclFindNonWhiteSpace( pcWhitespaceBeg, pcEnd );
     251             :     }
     252             : 
     253           0 :     return bHasBracket;
     254             : }
     255             : 
     256             : } // namespace
     257             : 
     258             : 
     259             : 
     260           0 : InputStream::InputStream( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStrm ) :
     261             :     // use single-byte ISO-8859-1 encoding which maps all byte characters to the first 256 Unicode characters
     262             :     mxTextStrm( TextInputStream::createXTextInputStream( rxContext, rxInStrm, RTL_TEXTENCODING_ISO_8859_1 ) ),
     263             :     maOpeningBracket( 1 ),
     264             :     maClosingBracket( 1 ),
     265             :     maOpeningCData( CREATE_OSTRING( "<![CDATA[" ) ),
     266             :     maClosingCData( CREATE_OSTRING( "]]>" ) ),
     267           0 :     mnBufferPos( 0 )
     268             : {
     269           0 :     maOpeningBracket[ 0 ] = '<';
     270           0 :     maClosingBracket[ 0 ] = '>';
     271           0 : }
     272             : 
     273           0 : InputStream::~InputStream()
     274             : {
     275           0 : }
     276             : 
     277           0 : sal_Int32 SAL_CALL InputStream::readBytes( Sequence< sal_Int8 >& rData, sal_Int32 nBytesToRead )
     278             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException, std::exception)
     279             : {
     280           0 :     if( nBytesToRead < 0 )
     281           0 :         throw IOException();
     282             : 
     283           0 :     rData.realloc( nBytesToRead );
     284           0 :     sal_Int8* pcDest = rData.getArray();
     285           0 :     sal_Int32 nRet = 0;
     286           0 :     while( (nBytesToRead > 0) && !mxTextStrm->isEOF() )
     287             :     {
     288           0 :         updateBuffer();
     289           0 :         sal_Int32 nReadSize = ::std::min( nBytesToRead, maBuffer.getLength() - mnBufferPos );
     290           0 :         if( nReadSize > 0 )
     291             :         {
     292           0 :             memcpy( pcDest + nRet, maBuffer.getStr() + mnBufferPos, static_cast< size_t >( nReadSize ) );
     293           0 :             mnBufferPos += nReadSize;
     294           0 :             nBytesToRead -= nReadSize;
     295           0 :             nRet += nReadSize;
     296             :         }
     297             :     }
     298           0 :     if( nRet < rData.getLength() )
     299           0 :         rData.realloc( nRet );
     300           0 :     return nRet;
     301             : }
     302             : 
     303           0 : sal_Int32 SAL_CALL InputStream::readSomeBytes( Sequence< sal_Int8 >& rData, sal_Int32 nMaxBytesToRead )
     304             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException, std::exception)
     305             : {
     306           0 :     return readBytes( rData, nMaxBytesToRead );
     307             : }
     308             : 
     309           0 : void SAL_CALL InputStream::skipBytes( sal_Int32 nBytesToSkip )
     310             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException, std::exception)
     311             : {
     312           0 :     if( nBytesToSkip < 0 )
     313           0 :         throw IOException();
     314             : 
     315           0 :     while( (nBytesToSkip > 0) && !mxTextStrm->isEOF() )
     316             :     {
     317           0 :         updateBuffer();
     318           0 :         sal_Int32 nSkipSize = ::std::min( nBytesToSkip, maBuffer.getLength() - mnBufferPos );
     319           0 :         mnBufferPos += nSkipSize;
     320           0 :         nBytesToSkip -= nSkipSize;
     321             :     }
     322           0 : }
     323             : 
     324           0 : sal_Int32 SAL_CALL InputStream::available() throw (NotConnectedException, IOException, RuntimeException, std::exception)
     325             : {
     326           0 :     updateBuffer();
     327           0 :     return maBuffer.getLength() - mnBufferPos;
     328             : }
     329             : 
     330           0 : void SAL_CALL InputStream::closeInput() throw (NotConnectedException, IOException, RuntimeException, std::exception)
     331             : {
     332           0 :     mxTextStrm->closeInput();
     333           0 : }
     334             : 
     335             : // private --------------------------------------------------------------------
     336             : 
     337           0 : void InputStream::updateBuffer() throw (IOException, RuntimeException)
     338             : {
     339           0 :     while( (mnBufferPos >= maBuffer.getLength()) && !mxTextStrm->isEOF() )
     340             :     {
     341             :         // collect new contents in a string buffer
     342           0 :         OStringBuffer aBuffer;
     343             : 
     344             :         // read and process characters until the opening bracket of the next XML element
     345           0 :         OString aChars = readToElementBegin();
     346           0 :         bool bHasOpeningBracket = lclProcessCharacters( aBuffer, aChars );
     347             : 
     348             :         // read and process characters until (and including) closing bracket (an XML element)
     349             :         OSL_ENSURE( bHasOpeningBracket || mxTextStrm->isEOF(), "InputStream::updateBuffer - missing opening bracket of XML element" );
     350           0 :         if( bHasOpeningBracket && !mxTextStrm->isEOF() )
     351             :         {
     352             :             // read the element text (add the leading opening bracket manually)
     353           0 :             OString aElement = OString( '<' ) + readToElementEnd();
     354             :             // check for CDATA part, starting with '<![CDATA['
     355           0 :             if( aElement.match( maOpeningCData ) )
     356             :             {
     357             :                 // search the end tag ']]>'
     358           0 :                 while( ((aElement.getLength() < maClosingCData.getLength()) || !aElement.endsWith( maClosingCData )) && !mxTextStrm->isEOF() )
     359           0 :                     aElement += readToElementEnd();
     360             :                 // copy the entire CDATA part
     361           0 :                 aBuffer.append( aElement );
     362             :             }
     363             :             else
     364             :             {
     365             :                 // no CDATA part - process the contents of the element
     366           0 :                 lclProcessElement( aBuffer, aElement );
     367           0 :             }
     368             :         }
     369             : 
     370           0 :         maBuffer = aBuffer.makeStringAndClear();
     371           0 :         mnBufferPos = 0;
     372           0 :     }
     373           0 : }
     374             : 
     375           0 : OString InputStream::readToElementBegin() throw (IOException, RuntimeException)
     376             : {
     377           0 :     return OUStringToOString( mxTextStrm->readString( maOpeningBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
     378             : }
     379             : 
     380           0 : OString InputStream::readToElementEnd() throw (IOException, RuntimeException)
     381             : {
     382           0 :     OString aText = OUStringToOString( mxTextStrm->readString( maClosingBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
     383             :     OSL_ENSURE( aText.endsWith(">"), "InputStream::readToElementEnd - missing closing bracket of XML element" );
     384           0 :     return aText;
     385             : }
     386             : 
     387             : 
     388             : 
     389             : } // namespace vml
     390             : } // namespave oox
     391             : 
     392             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10