LCOV - code coverage report
Current view: top level - libreoffice/oox/source/vml - vmlinputstream.cxx (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 129 152 84.9 %
Date: 2012-12-27 Functions: 17 20 85.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "oox/vml/vmlinputstream.hxx"
      21             : 
      22             : #include <com/sun/star/io/XTextInputStream.hpp>
      23             : #include <map>
      24             : #include <string.h>
      25             : #include <rtl/strbuf.hxx>
      26             : #include "oox/helper/helper.hxx"
      27             : #include "oox/helper/textinputstream.hxx"
      28             : 
      29             : namespace oox {
      30             : namespace vml {
      31             : 
      32             : // ============================================================================
      33             : 
      34             : using namespace ::com::sun::star::io;
      35             : using namespace ::com::sun::star::uno;
      36             : 
      37             : using ::rtl::OString;
      38             : using ::rtl::OStringBuffer;
      39             : 
      40             : // ============================================================================
      41             : 
      42             : namespace {
      43             : 
      44           8 : inline const sal_Char* lclFindCharacter( const sal_Char* pcBeg, const sal_Char* pcEnd, sal_Char cChar )
      45             : {
      46           8 :     sal_Int32 nIndex = rtl_str_indexOfChar_WithLength( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ), cChar );
      47           8 :     return (nIndex < 0) ? pcEnd : (pcBeg + nIndex);
      48             : }
      49             : 
      50        1839 : inline bool lclIsWhiteSpace( sal_Char cChar )
      51             : {
      52        1839 :     return cChar < 32;
      53             : }
      54             : 
      55          75 : const sal_Char* lclFindWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
      56             : {
      57        1808 :     for( ; pcBeg < pcEnd; ++pcBeg )
      58        1737 :         if( lclIsWhiteSpace( *pcBeg ) )
      59           4 :             return pcBeg;
      60          71 :     return pcEnd;
      61             : }
      62             : 
      63         145 : const sal_Char* lclFindNonWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
      64             : {
      65         200 :     for( ; pcBeg < pcEnd; ++pcBeg )
      66          96 :         if( !lclIsWhiteSpace( *pcBeg ) )
      67          41 :             return pcBeg;
      68         104 :     return pcEnd;
      69             : }
      70             : 
      71           4 : const sal_Char* lclTrimWhiteSpaceFromEnd( const sal_Char* pcBeg, const sal_Char* pcEnd )
      72             : {
      73           8 :     while( (pcBeg < pcEnd) && lclIsWhiteSpace( pcEnd[ -1 ] ) )
      74           0 :         --pcEnd;
      75           4 :     return pcEnd;
      76             : }
      77             : 
      78          76 : inline void lclAppendToBuffer( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
      79             : {
      80          76 :     rBuffer.append( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ) );
      81          76 : }
      82             : 
      83             : // ----------------------------------------------------------------------------
      84             : 
      85           3 : void lclProcessAttribs( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
      86             : {
      87             :     /*  Map attribute names to char-pointer of all attributes. This map is used
      88             :         to find multiple occurrences of attributes with the same name. The
      89             :         mapped pointers are used as map key in the next map below. */
      90             :     typedef ::std::map< OString, const sal_Char* > AttributeNameMap;
      91           3 :     AttributeNameMap aAttributeNames;
      92             : 
      93             :     /*  Map the char-pointers of all attributes to the full attribute definition
      94             :         string. This preserves the original order of the used attributes. */
      95             :     typedef ::std::map< const sal_Char*, OString > AttributeDataMap;
      96           3 :     AttributeDataMap aAttributes;
      97             : 
      98           3 :     bool bOk = true;
      99           3 :     const sal_Char* pcNameBeg = pcBeg;
     100          10 :     while( bOk && (pcNameBeg < pcEnd) )
     101             :     {
     102             :         // pcNameBeg points to begin of attribute name, find equality sign
     103           4 :         const sal_Char* pcEqualSign = lclFindCharacter( pcNameBeg, pcEnd, '=' );
     104           4 :         if( (bOk = pcEqualSign < pcEnd) == true )
     105             :         {
     106             :             // find end of attribute name (ignore whitespace between name and equality sign)
     107           4 :             const sal_Char* pcNameEnd = lclTrimWhiteSpaceFromEnd( pcNameBeg, pcEqualSign );
     108           4 :             if( (bOk = pcNameBeg < pcNameEnd) == true )
     109             :             {
     110             :                 // find begin of attribute value (must be single or double quote)
     111           4 :                 const sal_Char* pcValueBeg = lclFindNonWhiteSpace( pcEqualSign + 1, pcEnd );
     112           4 :                 if( (bOk = (pcValueBeg < pcEnd) && ((*pcValueBeg == '\'') || (*pcValueBeg == '"'))) == true )
     113             :                 {
     114             :                     // find end of attribute value (matching quote character)
     115           4 :                     const sal_Char* pcValueEnd = lclFindCharacter( pcValueBeg + 1, pcEnd, *pcValueBeg );
     116           4 :                     if( (bOk = pcValueEnd < pcEnd) == true )
     117             :                     {
     118           4 :                         ++pcValueEnd;
     119           4 :                         OString aAttribName( pcNameBeg, static_cast< sal_Int32 >( pcNameEnd - pcNameBeg ) );
     120           4 :                         OString aAttribData( pcNameBeg, static_cast< sal_Int32 >( pcValueEnd - pcNameBeg ) );
     121             :                         // search for an existing attribute with the same name
     122           4 :                         AttributeNameMap::iterator aIt = aAttributeNames.find( aAttribName );
     123             :                         // remove its definition from the data map
     124           4 :                         if( aIt != aAttributeNames.end() )
     125           0 :                             aAttributes.erase( aIt->second );
     126             :                         // insert the attribute into both maps
     127           4 :                         aAttributeNames[ aAttribName ] = pcNameBeg;
     128           4 :                         aAttributes[ pcNameBeg ] = aAttribData;
     129             :                         // continue with next attribute (skip whitespace after this attribute)
     130           4 :                         pcNameBeg = pcValueEnd;
     131           4 :                         if( (pcNameBeg < pcEnd) && ((bOk = lclIsWhiteSpace( *pcNameBeg )) == true) )
     132           1 :                             pcNameBeg = lclFindNonWhiteSpace( pcNameBeg + 1, pcEnd );
     133             :                     }
     134             :                 }
     135             :             }
     136             :         }
     137             :     }
     138             : 
     139             :     // if no error has occurred, build the resulting attribute list
     140           3 :     if( bOk )
     141           5 :         for( AttributeDataMap::iterator aIt = aAttributes.begin(), aEnd = aAttributes.end(); aIt != aEnd; ++aIt )
     142           3 :             rBuffer.append( ' ' ).append( aIt->second );
     143             :     // on error, just append the complete passed string
     144             :     else
     145           1 :         lclAppendToBuffer( rBuffer, pcBeg, pcEnd );
     146           3 : }
     147             : 
     148          65 : void lclProcessElement( OStringBuffer& rBuffer, const OString& rElement )
     149             : {
     150             :     // check that passed string starts and ends with the brackets of an XML element
     151          65 :     sal_Int32 nElementLen = rElement.getLength();
     152          65 :     if( nElementLen == 0 )
     153          65 :         return;
     154             : 
     155          65 :     const sal_Char* pcOpen = rElement.getStr();
     156          65 :     const sal_Char* pcClose = pcOpen + nElementLen - 1;
     157             : 
     158             :     // no complete element found
     159          65 :     if( (pcOpen >= pcClose) || (*pcOpen != '<') || (*pcClose != '>') )
     160             :     {
     161             :         // just append all passed characters
     162           0 :         rBuffer.append( rElement );
     163             :     }
     164             : 
     165             :     // skip parser instructions: '<![...]>'
     166          65 :     else if( (nElementLen >= 5) && (pcOpen[ 1 ] == '!') && (pcOpen[ 2 ] == '[') && (pcClose[ -1 ] == ']') )
     167             :     {
     168             :         // do nothing
     169             :     }
     170             : 
     171             :     // replace '<br>' element with newline
     172          65 :     else if( (nElementLen >= 4) && (pcOpen[ 1 ] == 'b') && (pcOpen[ 2 ] == 'r') && (lclFindNonWhiteSpace( pcOpen + 3, pcClose ) == pcClose) )
     173             :     {
     174           0 :         rBuffer.append( '\n' );
     175             :     }
     176             : 
     177             :     // check start elements and simple elements for repeated attributes
     178          65 :     else if( pcOpen[ 1 ] != '/' )
     179             :     {
     180             :         // find positions of text content inside brackets, exclude '/' in '<simpleelement/>'
     181          42 :         const sal_Char* pcContentBeg = pcOpen + 1;
     182          42 :         bool bIsEmptyElement = pcClose[ -1 ] == '/';
     183          42 :         const sal_Char* pcContentEnd = bIsEmptyElement ? (pcClose - 1) : pcClose;
     184             :         // append opening bracket and element name to buffer
     185          42 :         const sal_Char* pcWhiteSpace = lclFindWhiteSpace( pcContentBeg, pcContentEnd );
     186          42 :         lclAppendToBuffer( rBuffer, pcOpen, pcWhiteSpace );
     187             :         // find begin of attributes, and process all attributes
     188          42 :         const sal_Char* pcAttribBeg = lclFindNonWhiteSpace( pcWhiteSpace, pcContentEnd );
     189          42 :         if( pcAttribBeg < pcContentEnd )
     190           3 :             lclProcessAttribs( rBuffer, pcAttribBeg, pcContentEnd );
     191             :         // close the element
     192          42 :         if( bIsEmptyElement )
     193          19 :             rBuffer.append( '/' );
     194          42 :         rBuffer.append( '>' );
     195             :     }
     196             : 
     197             :     // append end elements without further processing
     198             :     else
     199             :     {
     200          23 :         rBuffer.append( rElement );
     201             :     }
     202             : }
     203             : 
     204          65 : bool lclProcessCharacters( OStringBuffer& rBuffer, const OString& rChars )
     205             : {
     206             :     /*  MSO has a very weird way to store and handle whitespaces. The stream
     207             :         may contain lots of spaces, tabs, and newlines which have to be handled
     208             :         as single space character. This will be done in this function.
     209             : 
     210             :         If the element text contains a literal line break, it will be stored as
     211             :         <br> tag (without matching </br> element). This input stream wrapper
     212             :         will replace this element with a literal LF character (see below).
     213             : 
     214             :         A single space character for its own is stored as is. Example: The
     215             :         element
     216             :             <font> </font>
     217             :         represents a single space character. The XML parser will ignore this
     218             :         space character completely without issuing a 'characters' event. The
     219             :         VML import filter implementation has to react on this case manually.
     220             : 
     221             :         A single space character following another character is stored
     222             :         literally and must not be stipped away here. Example: The element
     223             :             <font>abc </font>
     224             :         contains the three letters a, b, and c, followed by a space character.
     225             : 
     226             :         Consecutive space characters, or a leading single space character, are
     227             :         stored in a <span> element. If there are N space characters (N > 1),
     228             :         then the <span> element contains exactly (N-1) NBSP (non-breaking
     229             :         space) characters, followed by a regular space character. Examples:
     230             :         The element
     231             :             <font><span style='mso-spacerun:yes'>\xA0\xA0\xA0 </span></font>
     232             :         represents 4 consecutive space characters. Has to be handled by the
     233             :         implementation. The element
     234             :             <font><span style='mso-spacerun:yes'> abc</span></font>
     235             :         represents a space characters followed by the letters a, b, c. These
     236             :         strings have to be handled by the VML import filter implementation.
     237             :      */
     238             : 
     239             :     // passed string ends with the leading opening bracket of an XML element
     240          65 :     const sal_Char* pcBeg = rChars.getStr();
     241          65 :     const sal_Char* pcEnd = pcBeg + rChars.getLength();
     242          65 :     bool bHasBracket = (pcBeg < pcEnd) && (pcEnd[ -1 ] == '<');
     243          65 :     if( bHasBracket ) --pcEnd;
     244             : 
     245             :     // skip leading whitespace
     246          65 :     const sal_Char* pcContentsBeg = lclFindNonWhiteSpace( pcBeg, pcEnd );
     247         163 :     while( pcContentsBeg < pcEnd )
     248             :     {
     249          33 :         const sal_Char* pcWhitespaceBeg = lclFindWhiteSpace( pcContentsBeg + 1, pcEnd );
     250          33 :         lclAppendToBuffer( rBuffer, pcContentsBeg, pcWhitespaceBeg );
     251          33 :         if( pcWhitespaceBeg < pcEnd )
     252           1 :             rBuffer.append( ' ' );
     253          33 :         pcContentsBeg = lclFindNonWhiteSpace( pcWhitespaceBeg, pcEnd );
     254             :     }
     255             : 
     256          65 :     return bHasBracket;
     257             : }
     258             : 
     259             : } // namespace
     260             : 
     261             : // ============================================================================
     262             : 
     263           2 : InputStream::InputStream( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStrm ) :
     264             :     // use single-byte ISO-8859-1 encoding which maps all byte characters to the first 256 Unicode characters
     265             :     mxTextStrm( TextInputStream::createXTextInputStream( rxContext, rxInStrm, RTL_TEXTENCODING_ISO_8859_1 ) ),
     266             :     maOpeningBracket( 1 ),
     267             :     maClosingBracket( 1 ),
     268             :     maOpeningCData( CREATE_OSTRING( "<![CDATA[" ) ),
     269             :     maClosingCData( CREATE_OSTRING( "]]>" ) ),
     270           2 :     mnBufferPos( 0 )
     271             : {
     272           2 :     maOpeningBracket[ 0 ] = '<';
     273           2 :     maClosingBracket[ 0 ] = '>';
     274           2 : }
     275             : 
     276           4 : InputStream::~InputStream()
     277             : {
     278           4 : }
     279             : 
     280           4 : sal_Int32 SAL_CALL InputStream::readBytes( Sequence< sal_Int8 >& rData, sal_Int32 nBytesToRead )
     281             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
     282             : {
     283           4 :     if( nBytesToRead < 0 )
     284           0 :         throw IOException();
     285             : 
     286           4 :     rData.realloc( nBytesToRead );
     287           4 :     sal_Int8* pcDest = rData.getArray();
     288           4 :     sal_Int32 nRet = 0;
     289          73 :     while( (nBytesToRead > 0) && !mxTextStrm->isEOF() )
     290             :     {
     291          65 :         updateBuffer();
     292          65 :         sal_Int32 nReadSize = ::std::min( nBytesToRead, maBuffer.getLength() - mnBufferPos );
     293          65 :         if( nReadSize > 0 )
     294             :         {
     295          65 :             memcpy( pcDest + nRet, maBuffer.getStr() + mnBufferPos, static_cast< size_t >( nReadSize ) );
     296          65 :             mnBufferPos += nReadSize;
     297          65 :             nBytesToRead -= nReadSize;
     298          65 :             nRet += nReadSize;
     299             :         }
     300             :     }
     301           4 :     if( nRet < rData.getLength() )
     302           4 :         rData.realloc( nRet );
     303           4 :     return nRet;
     304             : }
     305             : 
     306           4 : sal_Int32 SAL_CALL InputStream::readSomeBytes( Sequence< sal_Int8 >& rData, sal_Int32 nMaxBytesToRead )
     307             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
     308             : {
     309           4 :     return readBytes( rData, nMaxBytesToRead );
     310             : }
     311             : 
     312           0 : void SAL_CALL InputStream::skipBytes( sal_Int32 nBytesToSkip )
     313             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
     314             : {
     315           0 :     if( nBytesToSkip < 0 )
     316           0 :         throw IOException();
     317             : 
     318           0 :     while( (nBytesToSkip > 0) && !mxTextStrm->isEOF() )
     319             :     {
     320           0 :         updateBuffer();
     321           0 :         sal_Int32 nSkipSize = ::std::min( nBytesToSkip, maBuffer.getLength() - mnBufferPos );
     322           0 :         mnBufferPos += nSkipSize;
     323           0 :         nBytesToSkip -= nSkipSize;
     324             :     }
     325           0 : }
     326             : 
     327           0 : sal_Int32 SAL_CALL InputStream::available() throw (NotConnectedException, IOException, RuntimeException)
     328             : {
     329           0 :     updateBuffer();
     330           0 :     return maBuffer.getLength() - mnBufferPos;
     331             : }
     332             : 
     333           0 : void SAL_CALL InputStream::closeInput() throw (NotConnectedException, IOException, RuntimeException)
     334             : {
     335           0 :     mxTextStrm->closeInput();
     336           0 : }
     337             : 
     338             : // private --------------------------------------------------------------------
     339             : 
     340          65 : void InputStream::updateBuffer() throw (IOException, RuntimeException)
     341             : {
     342         195 :     while( (mnBufferPos >= maBuffer.getLength()) && !mxTextStrm->isEOF() )
     343             :     {
     344             :         // collect new contents in a string buffer
     345          65 :         OStringBuffer aBuffer;
     346             : 
     347             :         // read and process characters until the opening bracket of the next XML element
     348          65 :         OString aChars = readToElementBegin();
     349          65 :         bool bHasOpeningBracket = lclProcessCharacters( aBuffer, aChars );
     350             : 
     351             :         // read and process characters until (and including) closing bracket (an XML element)
     352             :         OSL_ENSURE( bHasOpeningBracket || mxTextStrm->isEOF(), "InputStream::updateBuffer - missing opening bracket of XML element" );
     353          65 :         if( bHasOpeningBracket && !mxTextStrm->isEOF() )
     354             :         {
     355             :             // read the element text (add the leading opening bracket manually)
     356          65 :             OString aElement = OString( '<' ) + readToElementEnd();
     357             :             // check for CDATA part, starting with '<![CDATA['
     358          65 :             if( aElement.match( maOpeningCData ) )
     359             :             {
     360             :                 // search the end tag ']]>'
     361           0 :                 while( ((aElement.getLength() < maClosingCData.getLength()) || !aElement.match( maClosingCData, aElement.getLength() - maClosingCData.getLength() )) && !mxTextStrm->isEOF() )
     362           0 :                     aElement += readToElementEnd();
     363             :                 // copy the entire CDATA part
     364           0 :                 aBuffer.append( aElement );
     365             :             }
     366             :             else
     367             :             {
     368             :                 // no CDATA part - process the contents of the element
     369          65 :                 lclProcessElement( aBuffer, aElement );
     370          65 :             }
     371             :         }
     372             : 
     373          65 :         maBuffer = aBuffer.makeStringAndClear();
     374          65 :         mnBufferPos = 0;
     375          65 :     }
     376          65 : }
     377             : 
     378          65 : OString InputStream::readToElementBegin() throw (IOException, RuntimeException)
     379             : {
     380          65 :     return OUStringToOString( mxTextStrm->readString( maOpeningBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
     381             : }
     382             : 
     383          65 : OString InputStream::readToElementEnd() throw (IOException, RuntimeException)
     384             : {
     385          65 :     OString aText = OUStringToOString( mxTextStrm->readString( maClosingBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
     386             :     OSL_ENSURE( !aText.isEmpty() && (aText[ aText.getLength() - 1 ] == '>'), "InputStream::readToElementEnd - missing closing bracket of XML element" );
     387          65 :     return aText;
     388             : }
     389             : 
     390             : // ============================================================================
     391             : 
     392             : } // namespace vml
     393             : } // namespave oox
     394             : 
     395             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10