LCOV - code coverage report
Current view: top level - usr/local/src/libreoffice/oox/source/vml - vmlinputstream.cxx (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 129 152 84.9 %
Date: 2013-07-09 Functions: 17 20 85.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : #include "oox/vml/vmlinputstream.hxx"
      21             : 
      22             : #include <com/sun/star/io/XTextInputStream2.hpp>
      23             : #include <map>
      24             : #include <string.h>
      25             : #include <rtl/strbuf.hxx>
      26             : #include "oox/helper/helper.hxx"
      27             : #include "oox/helper/textinputstream.hxx"
      28             : 
      29             : namespace oox {
      30             : namespace vml {
      31             : 
      32             : // ============================================================================
      33             : 
      34             : using namespace ::com::sun::star::io;
      35             : using namespace ::com::sun::star::uno;
      36             : 
      37             : // ============================================================================
      38             : 
      39             : namespace {
      40             : 
      41           8 : inline const sal_Char* lclFindCharacter( const sal_Char* pcBeg, const sal_Char* pcEnd, sal_Char cChar )
      42             : {
      43           8 :     sal_Int32 nIndex = rtl_str_indexOfChar_WithLength( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ), cChar );
      44           8 :     return (nIndex < 0) ? pcEnd : (pcBeg + nIndex);
      45             : }
      46             : 
      47        1839 : inline bool lclIsWhiteSpace( sal_Char cChar )
      48             : {
      49        1839 :     return cChar < 32;
      50             : }
      51             : 
      52          75 : const sal_Char* lclFindWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
      53             : {
      54        1808 :     for( ; pcBeg < pcEnd; ++pcBeg )
      55        1737 :         if( lclIsWhiteSpace( *pcBeg ) )
      56           4 :             return pcBeg;
      57          71 :     return pcEnd;
      58             : }
      59             : 
      60         145 : const sal_Char* lclFindNonWhiteSpace( const sal_Char* pcBeg, const sal_Char* pcEnd )
      61             : {
      62         200 :     for( ; pcBeg < pcEnd; ++pcBeg )
      63          96 :         if( !lclIsWhiteSpace( *pcBeg ) )
      64          41 :             return pcBeg;
      65         104 :     return pcEnd;
      66             : }
      67             : 
      68           4 : const sal_Char* lclTrimWhiteSpaceFromEnd( const sal_Char* pcBeg, const sal_Char* pcEnd )
      69             : {
      70           8 :     while( (pcBeg < pcEnd) && lclIsWhiteSpace( pcEnd[ -1 ] ) )
      71           0 :         --pcEnd;
      72           4 :     return pcEnd;
      73             : }
      74             : 
      75          76 : inline void lclAppendToBuffer( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
      76             : {
      77          76 :     rBuffer.append( pcBeg, static_cast< sal_Int32 >( pcEnd - pcBeg ) );
      78          76 : }
      79             : 
      80             : // ----------------------------------------------------------------------------
      81             : 
      82           3 : void lclProcessAttribs( OStringBuffer& rBuffer, const sal_Char* pcBeg, const sal_Char* pcEnd )
      83             : {
      84             :     /*  Map attribute names to char-pointer of all attributes. This map is used
      85             :         to find multiple occurrences of attributes with the same name. The
      86             :         mapped pointers are used as map key in the next map below. */
      87             :     typedef ::std::map< OString, const sal_Char* > AttributeNameMap;
      88           3 :     AttributeNameMap aAttributeNames;
      89             : 
      90             :     /*  Map the char-pointers of all attributes to the full attribute definition
      91             :         string. This preserves the original order of the used attributes. */
      92             :     typedef ::std::map< const sal_Char*, OString > AttributeDataMap;
      93           6 :     AttributeDataMap aAttributes;
      94             : 
      95           3 :     bool bOk = true;
      96           3 :     const sal_Char* pcNameBeg = pcBeg;
      97          10 :     while( bOk && (pcNameBeg < pcEnd) )
      98             :     {
      99             :         // pcNameBeg points to begin of attribute name, find equality sign
     100           4 :         const sal_Char* pcEqualSign = lclFindCharacter( pcNameBeg, pcEnd, '=' );
     101           4 :         if( (bOk = pcEqualSign < pcEnd) == true )
     102             :         {
     103             :             // find end of attribute name (ignore whitespace between name and equality sign)
     104           4 :             const sal_Char* pcNameEnd = lclTrimWhiteSpaceFromEnd( pcNameBeg, pcEqualSign );
     105           4 :             if( (bOk = pcNameBeg < pcNameEnd) == true )
     106             :             {
     107             :                 // find begin of attribute value (must be single or double quote)
     108           4 :                 const sal_Char* pcValueBeg = lclFindNonWhiteSpace( pcEqualSign + 1, pcEnd );
     109           4 :                 if( (bOk = (pcValueBeg < pcEnd) && ((*pcValueBeg == '\'') || (*pcValueBeg == '"'))) == true )
     110             :                 {
     111             :                     // find end of attribute value (matching quote character)
     112           4 :                     const sal_Char* pcValueEnd = lclFindCharacter( pcValueBeg + 1, pcEnd, *pcValueBeg );
     113           4 :                     if( (bOk = pcValueEnd < pcEnd) == true )
     114             :                     {
     115           4 :                         ++pcValueEnd;
     116           4 :                         OString aAttribName( pcNameBeg, static_cast< sal_Int32 >( pcNameEnd - pcNameBeg ) );
     117           8 :                         OString aAttribData( pcNameBeg, static_cast< sal_Int32 >( pcValueEnd - pcNameBeg ) );
     118             :                         // search for an existing attribute with the same name
     119           4 :                         AttributeNameMap::iterator aIt = aAttributeNames.find( aAttribName );
     120             :                         // remove its definition from the data map
     121           4 :                         if( aIt != aAttributeNames.end() )
     122           0 :                             aAttributes.erase( aIt->second );
     123             :                         // insert the attribute into both maps
     124           4 :                         aAttributeNames[ aAttribName ] = pcNameBeg;
     125           4 :                         aAttributes[ pcNameBeg ] = aAttribData;
     126             :                         // continue with next attribute (skip whitespace after this attribute)
     127           4 :                         pcNameBeg = pcValueEnd;
     128           4 :                         if( (pcNameBeg < pcEnd) && ((bOk = lclIsWhiteSpace( *pcNameBeg )) == true) )
     129           5 :                             pcNameBeg = lclFindNonWhiteSpace( pcNameBeg + 1, pcEnd );
     130             :                     }
     131             :                 }
     132             :             }
     133             :         }
     134             :     }
     135             : 
     136             :     // if no error has occurred, build the resulting attribute list
     137           3 :     if( bOk )
     138           5 :         for( AttributeDataMap::iterator aIt = aAttributes.begin(), aEnd = aAttributes.end(); aIt != aEnd; ++aIt )
     139           3 :             rBuffer.append( ' ' ).append( aIt->second );
     140             :     // on error, just append the complete passed string
     141             :     else
     142           4 :         lclAppendToBuffer( rBuffer, pcBeg, pcEnd );
     143           3 : }
     144             : 
     145          65 : void lclProcessElement( OStringBuffer& rBuffer, const OString& rElement )
     146             : {
     147             :     // check that passed string starts and ends with the brackets of an XML element
     148          65 :     sal_Int32 nElementLen = rElement.getLength();
     149          65 :     if( nElementLen == 0 )
     150          65 :         return;
     151             : 
     152          65 :     const sal_Char* pcOpen = rElement.getStr();
     153          65 :     const sal_Char* pcClose = pcOpen + nElementLen - 1;
     154             : 
     155             :     // no complete element found
     156          65 :     if( (pcOpen >= pcClose) || (*pcOpen != '<') || (*pcClose != '>') )
     157             :     {
     158             :         // just append all passed characters
     159           0 :         rBuffer.append( rElement );
     160             :     }
     161             : 
     162             :     // skip parser instructions: '<![...]>'
     163          65 :     else if( (nElementLen >= 5) && (pcOpen[ 1 ] == '!') && (pcOpen[ 2 ] == '[') && (pcClose[ -1 ] == ']') )
     164             :     {
     165             :         // do nothing
     166             :     }
     167             : 
     168             :     // replace '<br>' element with newline
     169          65 :     else if( (nElementLen >= 4) && (pcOpen[ 1 ] == 'b') && (pcOpen[ 2 ] == 'r') && (lclFindNonWhiteSpace( pcOpen + 3, pcClose ) == pcClose) )
     170             :     {
     171           0 :         rBuffer.append( '\n' );
     172             :     }
     173             : 
     174             :     // check start elements and simple elements for repeated attributes
     175          65 :     else if( pcOpen[ 1 ] != '/' )
     176             :     {
     177             :         // find positions of text content inside brackets, exclude '/' in '<simpleelement/>'
     178          42 :         const sal_Char* pcContentBeg = pcOpen + 1;
     179          42 :         bool bIsEmptyElement = pcClose[ -1 ] == '/';
     180          42 :         const sal_Char* pcContentEnd = bIsEmptyElement ? (pcClose - 1) : pcClose;
     181             :         // append opening bracket and element name to buffer
     182          42 :         const sal_Char* pcWhiteSpace = lclFindWhiteSpace( pcContentBeg, pcContentEnd );
     183          42 :         lclAppendToBuffer( rBuffer, pcOpen, pcWhiteSpace );
     184             :         // find begin of attributes, and process all attributes
     185          42 :         const sal_Char* pcAttribBeg = lclFindNonWhiteSpace( pcWhiteSpace, pcContentEnd );
     186          42 :         if( pcAttribBeg < pcContentEnd )
     187           3 :             lclProcessAttribs( rBuffer, pcAttribBeg, pcContentEnd );
     188             :         // close the element
     189          42 :         if( bIsEmptyElement )
     190          19 :             rBuffer.append( '/' );
     191          42 :         rBuffer.append( '>' );
     192             :     }
     193             : 
     194             :     // append end elements without further processing
     195             :     else
     196             :     {
     197          23 :         rBuffer.append( rElement );
     198             :     }
     199             : }
     200             : 
     201          65 : bool lclProcessCharacters( OStringBuffer& rBuffer, const OString& rChars )
     202             : {
     203             :     /*  MSO has a very weird way to store and handle whitespaces. The stream
     204             :         may contain lots of spaces, tabs, and newlines which have to be handled
     205             :         as single space character. This will be done in this function.
     206             : 
     207             :         If the element text contains a literal line break, it will be stored as
     208             :         <br> tag (without matching </br> element). This input stream wrapper
     209             :         will replace this element with a literal LF character (see below).
     210             : 
     211             :         A single space character for its own is stored as is. Example: The
     212             :         element
     213             :             <font> </font>
     214             :         represents a single space character. The XML parser will ignore this
     215             :         space character completely without issuing a 'characters' event. The
     216             :         VML import filter implementation has to react on this case manually.
     217             : 
     218             :         A single space character following another character is stored
     219             :         literally and must not be stipped away here. Example: The element
     220             :             <font>abc </font>
     221             :         contains the three letters a, b, and c, followed by a space character.
     222             : 
     223             :         Consecutive space characters, or a leading single space character, are
     224             :         stored in a <span> element. If there are N space characters (N > 1),
     225             :         then the <span> element contains exactly (N-1) NBSP (non-breaking
     226             :         space) characters, followed by a regular space character. Examples:
     227             :         The element
     228             :             <font><span style='mso-spacerun:yes'>\xA0\xA0\xA0 </span></font>
     229             :         represents 4 consecutive space characters. Has to be handled by the
     230             :         implementation. The element
     231             :             <font><span style='mso-spacerun:yes'> abc</span></font>
     232             :         represents a space characters followed by the letters a, b, c. These
     233             :         strings have to be handled by the VML import filter implementation.
     234             :      */
     235             : 
     236             :     // passed string ends with the leading opening bracket of an XML element
     237          65 :     const sal_Char* pcBeg = rChars.getStr();
     238          65 :     const sal_Char* pcEnd = pcBeg + rChars.getLength();
     239          65 :     bool bHasBracket = (pcBeg < pcEnd) && (pcEnd[ -1 ] == '<');
     240          65 :     if( bHasBracket ) --pcEnd;
     241             : 
     242             :     // skip leading whitespace
     243          65 :     const sal_Char* pcContentsBeg = lclFindNonWhiteSpace( pcBeg, pcEnd );
     244         163 :     while( pcContentsBeg < pcEnd )
     245             :     {
     246          33 :         const sal_Char* pcWhitespaceBeg = lclFindWhiteSpace( pcContentsBeg + 1, pcEnd );
     247          33 :         lclAppendToBuffer( rBuffer, pcContentsBeg, pcWhitespaceBeg );
     248          33 :         if( pcWhitespaceBeg < pcEnd )
     249           1 :             rBuffer.append( ' ' );
     250          33 :         pcContentsBeg = lclFindNonWhiteSpace( pcWhitespaceBeg, pcEnd );
     251             :     }
     252             : 
     253          65 :     return bHasBracket;
     254             : }
     255             : 
     256             : } // namespace
     257             : 
     258             : // ============================================================================
     259             : 
     260           2 : InputStream::InputStream( const Reference< XComponentContext >& rxContext, const Reference< XInputStream >& rxInStrm ) :
     261             :     // use single-byte ISO-8859-1 encoding which maps all byte characters to the first 256 Unicode characters
     262             :     mxTextStrm( TextInputStream::createXTextInputStream( rxContext, rxInStrm, RTL_TEXTENCODING_ISO_8859_1 ) ),
     263             :     maOpeningBracket( 1 ),
     264             :     maClosingBracket( 1 ),
     265             :     maOpeningCData( CREATE_OSTRING( "<![CDATA[" ) ),
     266             :     maClosingCData( CREATE_OSTRING( "]]>" ) ),
     267           2 :     mnBufferPos( 0 )
     268             : {
     269           2 :     maOpeningBracket[ 0 ] = '<';
     270           2 :     maClosingBracket[ 0 ] = '>';
     271           2 : }
     272             : 
     273           4 : InputStream::~InputStream()
     274             : {
     275           4 : }
     276             : 
     277           4 : sal_Int32 SAL_CALL InputStream::readBytes( Sequence< sal_Int8 >& rData, sal_Int32 nBytesToRead )
     278             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
     279             : {
     280           4 :     if( nBytesToRead < 0 )
     281           0 :         throw IOException();
     282             : 
     283           4 :     rData.realloc( nBytesToRead );
     284           4 :     sal_Int8* pcDest = rData.getArray();
     285           4 :     sal_Int32 nRet = 0;
     286          73 :     while( (nBytesToRead > 0) && !mxTextStrm->isEOF() )
     287             :     {
     288          65 :         updateBuffer();
     289          65 :         sal_Int32 nReadSize = ::std::min( nBytesToRead, maBuffer.getLength() - mnBufferPos );
     290          65 :         if( nReadSize > 0 )
     291             :         {
     292          65 :             memcpy( pcDest + nRet, maBuffer.getStr() + mnBufferPos, static_cast< size_t >( nReadSize ) );
     293          65 :             mnBufferPos += nReadSize;
     294          65 :             nBytesToRead -= nReadSize;
     295          65 :             nRet += nReadSize;
     296             :         }
     297             :     }
     298           4 :     if( nRet < rData.getLength() )
     299           4 :         rData.realloc( nRet );
     300           4 :     return nRet;
     301             : }
     302             : 
     303           4 : sal_Int32 SAL_CALL InputStream::readSomeBytes( Sequence< sal_Int8 >& rData, sal_Int32 nMaxBytesToRead )
     304             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
     305             : {
     306           4 :     return readBytes( rData, nMaxBytesToRead );
     307             : }
     308             : 
     309           0 : void SAL_CALL InputStream::skipBytes( sal_Int32 nBytesToSkip )
     310             :         throw (NotConnectedException, BufferSizeExceededException, IOException, RuntimeException)
     311             : {
     312           0 :     if( nBytesToSkip < 0 )
     313           0 :         throw IOException();
     314             : 
     315           0 :     while( (nBytesToSkip > 0) && !mxTextStrm->isEOF() )
     316             :     {
     317           0 :         updateBuffer();
     318           0 :         sal_Int32 nSkipSize = ::std::min( nBytesToSkip, maBuffer.getLength() - mnBufferPos );
     319           0 :         mnBufferPos += nSkipSize;
     320           0 :         nBytesToSkip -= nSkipSize;
     321             :     }
     322           0 : }
     323             : 
     324           0 : sal_Int32 SAL_CALL InputStream::available() throw (NotConnectedException, IOException, RuntimeException)
     325             : {
     326           0 :     updateBuffer();
     327           0 :     return maBuffer.getLength() - mnBufferPos;
     328             : }
     329             : 
     330           0 : void SAL_CALL InputStream::closeInput() throw (NotConnectedException, IOException, RuntimeException)
     331             : {
     332           0 :     mxTextStrm->closeInput();
     333           0 : }
     334             : 
     335             : // private --------------------------------------------------------------------
     336             : 
     337          65 : void InputStream::updateBuffer() throw (IOException, RuntimeException)
     338             : {
     339         195 :     while( (mnBufferPos >= maBuffer.getLength()) && !mxTextStrm->isEOF() )
     340             :     {
     341             :         // collect new contents in a string buffer
     342          65 :         OStringBuffer aBuffer;
     343             : 
     344             :         // read and process characters until the opening bracket of the next XML element
     345         130 :         OString aChars = readToElementBegin();
     346          65 :         bool bHasOpeningBracket = lclProcessCharacters( aBuffer, aChars );
     347             : 
     348             :         // read and process characters until (and including) closing bracket (an XML element)
     349             :         OSL_ENSURE( bHasOpeningBracket || mxTextStrm->isEOF(), "InputStream::updateBuffer - missing opening bracket of XML element" );
     350          65 :         if( bHasOpeningBracket && !mxTextStrm->isEOF() )
     351             :         {
     352             :             // read the element text (add the leading opening bracket manually)
     353          65 :             OString aElement = OString( '<' ) + readToElementEnd();
     354             :             // check for CDATA part, starting with '<![CDATA['
     355          65 :             if( aElement.match( maOpeningCData ) )
     356             :             {
     357             :                 // search the end tag ']]>'
     358           0 :                 while( ((aElement.getLength() < maClosingCData.getLength()) || !aElement.match( maClosingCData, aElement.getLength() - maClosingCData.getLength() )) && !mxTextStrm->isEOF() )
     359           0 :                     aElement += readToElementEnd();
     360             :                 // copy the entire CDATA part
     361           0 :                 aBuffer.append( aElement );
     362             :             }
     363             :             else
     364             :             {
     365             :                 // no CDATA part - process the contents of the element
     366          65 :                 lclProcessElement( aBuffer, aElement );
     367          65 :             }
     368             :         }
     369             : 
     370          65 :         maBuffer = aBuffer.makeStringAndClear();
     371          65 :         mnBufferPos = 0;
     372          65 :     }
     373          65 : }
     374             : 
     375          65 : OString InputStream::readToElementBegin() throw (IOException, RuntimeException)
     376             : {
     377          65 :     return OUStringToOString( mxTextStrm->readString( maOpeningBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
     378             : }
     379             : 
     380          65 : OString InputStream::readToElementEnd() throw (IOException, RuntimeException)
     381             : {
     382          65 :     OString aText = OUStringToOString( mxTextStrm->readString( maClosingBracket, sal_False ), RTL_TEXTENCODING_ISO_8859_1 );
     383             :     OSL_ENSURE( !aText.isEmpty() && (aText[ aText.getLength() - 1 ] == '>'), "InputStream::readToElementEnd - missing closing bracket of XML element" );
     384          65 :     return aText;
     385             : }
     386             : 
     387             : // ============================================================================
     388             : 
     389             : } // namespace vml
     390             : } // namespave oox
     391             : 
     392             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10