LCOV - code coverage report
Current view: top level - usr/local/src/libreoffice/sdext/source/pdfimport/pdfparse - pdfparse.cxx (source / functions) Hit Total Coverage
Test: libreoffice_filtered.info Lines: 231 291 79.4 %
Date: 2013-07-09 Functions: 30 39 76.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : 
      21             : #if defined __SUNPRO_CC
      22             : #pragma disable_warn
      23             : #elif defined _MSC_VER
      24             : #pragma warning(push, 1)
      25             : #endif
      26             : 
      27             : #include "pdfparse.hxx"
      28             : 
      29             : // workaround windows compiler: do not include multi_pass.hpp
      30             : #include <boost/spirit/include/classic_core.hpp>
      31             : #include <boost/spirit/include/classic_utility.hpp>
      32             : #include <boost/spirit/include/classic_error_handling.hpp>
      33             : #include <boost/spirit/include/classic_file_iterator.hpp>
      34             : #include <boost/bind.hpp>
      35             : #include <string.h>
      36             : 
      37             : #include <rtl/strbuf.hxx>
      38             : #include <rtl/alloc.h>
      39             : 
      40             : // disable warnings again because someone along the line has enabled them
      41             : #if defined __SUNPRO_CC
      42             : #pragma disable_warn
      43             : #elif defined _MSC_VER
      44             : #pragma warning(push, 1)
      45             : #endif
      46             : 
      47             : using namespace boost::spirit;
      48             : using namespace pdfparse;
      49             : 
      50             : 
      51             : class StringEmitContext : public EmitContext
      52             : {
      53             :     OStringBuffer m_aBuf;
      54             :     public:
      55           0 :     StringEmitContext() : EmitContext(), m_aBuf(256) {}
      56           0 :     virtual ~StringEmitContext() {}
      57           0 :     virtual bool write( const void* pBuf, unsigned int nLen ) throw()
      58             :     {
      59           0 :         m_aBuf.append( (const sal_Char*)pBuf, nLen );
      60           0 :         return true;
      61             :     }
      62           0 :     virtual unsigned int getCurPos() throw() { return m_aBuf.getLength(); }
      63           0 :     virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw()
      64           0 :     { return (nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) ?
      65           0 :              write( m_aBuf.getStr() + nOrigOffset, nLen ) : false; }
      66           0 :     virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw()
      67             :     {
      68           0 :         if( nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) )
      69             :         {
      70           0 :             memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
      71           0 :             return nLen;
      72             :         }
      73           0 :         return 0;
      74             :     }
      75             : 
      76           0 :     OString getString() { return m_aBuf.makeStringAndClear(); }
      77             : };
      78             : 
      79             : template< class iteratorT >
      80             : class PDFGrammar :  public grammar< PDFGrammar<iteratorT> >
      81             : {
      82             : public:
      83             : 
      84           3 :     PDFGrammar( const iteratorT& first )
      85           3 :     : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
      86           3 :     ~PDFGrammar()
      87             :     {
      88           3 :         if( !m_aObjectStack.empty() )
      89           0 :             delete m_aObjectStack.front();
      90           3 :     }
      91             : 
      92             :     double m_fDouble;
      93             :     std::vector< unsigned int > m_aUIntStack;
      94             :     std::vector< PDFEntry* >    m_aObjectStack;
      95             :     OString                m_aErrorString;
      96             :     iteratorT                   m_aGlobalBegin;
      97             : 
      98             : public:
      99             :     struct pdf_string_parser
     100             :     {
     101             :         typedef nil_t result_t;
     102             :         template <typename ScannerT>
     103             :         std::ptrdiff_t
     104           6 :         operator()(ScannerT const& scan, result_t&) const
     105             :         {
     106           6 :             std::ptrdiff_t len = 0;
     107             : 
     108           6 :             int nBraceLevel = 0;
     109         174 :             while( ! scan.at_end() )
     110             :             {
     111         168 :                 char c = *scan;
     112         168 :                 if( c == ')' )
     113             :                 {
     114           6 :                     nBraceLevel--;
     115           6 :                     if( nBraceLevel < 0 )
     116           6 :                         break;
     117             :                 }
     118         162 :                 else if( c == '(' )
     119           0 :                     nBraceLevel++;
     120         162 :                 else if( c == '\\' ) // ignore escaped braces
     121             :                 {
     122           0 :                     ++len;
     123           0 :                     ++scan;
     124           0 :                     if( scan.at_end() )
     125           0 :                         break;
     126             :                 }
     127         162 :                 ++len;
     128         162 :                 ++scan;
     129             :             }
     130           6 :             return scan.at_end() ? -1 : len;
     131             :         }
     132             :     };
     133             : 
     134             :     template< typename ScannerT >
     135           3 :     struct definition
     136             :     {
     137           3 :         definition( const PDFGrammar<iteratorT>& rSelf )
     138           3 :         {
     139           3 :             PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
     140             : 
     141             :             // workaround workshop compiler: comment_p doesn't work
     142             :             // comment     = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
     143           3 :             comment     = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
     144             : 
     145           3 :             boolean     = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
     146             : 
     147             :             // workaround workshop compiler: confix_p doesn't work
     148             :             //stream      = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
     149           3 :             stream      = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
     150             : 
     151           3 :             name        = lexeme_d[
     152             :                             ch_p('/')
     153             :                             >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
     154           3 :                                [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
     155             : 
     156             :             // workaround workshop compiler: confix_p doesn't work
     157             :             //stringtype  = ( confix_p("(",*anychar_p, ")") |
     158             :             //                confix_p("<",*xdigit_p,  ">") )
     159             :             //              [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
     160             : 
     161           9 :             stringtype  = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
     162           6 :                             ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
     163          12 :                           [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
     164             : 
     165           3 :             null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
     166             : 
     167             :             #ifdef USE_ASSIGN_ACTOR
     168             :             objectref   = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
     169             :                             >> uint_p[push_back_a(pSelf->m_aUIntStack)]
     170             :                             >> ch_p('R')
     171             :                             >> eps_p
     172             :                           )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
     173             :             #else
     174           9 :             objectref   = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     175          12 :                             >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     176          12 :                             >> ch_p('R')
     177           6 :                             >> eps_p
     178          12 :                           )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
     179             :             #endif
     180             : 
     181             :             #ifdef USE_ASSIGN_ACTOR
     182             :             simple_type = objectref | name |
     183             :                           ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
     184             :                           [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
     185             :                           | stringtype | boolean | null_object;
     186             :             #else
     187           9 :             simple_type = objectref | name |
     188           6 :                           ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
     189          12 :                           [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
     190           9 :                           | stringtype | boolean | null_object;
     191             :             #endif
     192             : 
     193           3 :             dict_begin  = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
     194           3 :             dict_end    = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
     195             : 
     196           3 :             array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
     197           3 :             array_end   = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
     198             : 
     199             :             #ifdef USE_ASSIGN_ACTOR
     200             :             object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
     201             :                           >> uint_p[push_back_a(pSelf->m_aUIntStack)]
     202             :                           >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
     203             :             #else
     204           9 :             object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     205          12 :                           >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     206          12 :                           >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
     207             :             #endif
     208           3 :             object_end  = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
     209             : 
     210           9 :             xref        = str_p( "xref" ) >> uint_p >> uint_p
     211          12 :                           >> lexeme_d[
     212           6 :                                 +( repeat_p(10)[digit_p]
     213           6 :                                    >> blank_p
     214          12 :                                    >> repeat_p(5)[digit_p]
     215           6 :                                    >> blank_p
     216          12 :                                    >> ( ch_p('n') | ch_p('f') )
     217          12 :                                    >> repeat_p(2)[space_p]
     218             :                                  ) ];
     219             : 
     220           9 :             dict_element= dict_begin | comment | simple_type
     221           9 :                           | array_begin | array_end | dict_end;
     222             : 
     223           3 :             object      = object_begin
     224           6 :                           >> *dict_element
     225          12 :                           >> !stream
     226           9 :                           >> object_end;
     227             : 
     228           9 :             trailer     = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
     229          12 :                           >> *dict_element
     230          12 :                           >> str_p("startxref")
     231           6 :                           >> uint_p
     232          12 :                           >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
     233             : 
     234             :             #ifdef USE_ASSIGN_ACTOR
     235             :             pdfrule     = ! (lexeme_d[
     236             :                                 str_p( "%PDF-" )
     237             :                                 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
     238             :                                 >> ch_p('.')
     239             :                                 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
     240             :                                 >> *((~ch_p('\r') & ~ch_p('\n')))
     241             :                                 >> eol_p
     242             :                              ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
     243             :                           >> *( comment | object | ( xref >> trailer ) );
     244             :             #else
     245          15 :             pdfrule     = ! (lexeme_d[
     246             :                                 str_p( "%PDF-" )
     247           6 :                                 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     248          12 :                                 >> ch_p('.')
     249          12 :                                 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     250          12 :                                 >> *((~ch_p('\r') & ~ch_p('\n')))
     251           6 :                                 >> eol_p
     252          12 :                              ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
     253          12 :                           >> *( comment | object | ( xref >> trailer ) );
     254             :             #endif
     255           3 :         }
     256             :         rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
     257             :                          objectref, array, value, dict_element, dict_begin, dict_end,
     258             :                          array_begin, array_end, object, object_begin, object_end,
     259             :                          xref, trailer, pdfrule;
     260             : 
     261           3 :         const rule< ScannerT >& start() const { return pdfrule; }
     262             :     };
     263             : 
     264             :     #ifndef USE_ASSIGN_ACTOR
     265         711 :     void push_back_action_uint( unsigned int i )
     266             :     {
     267         711 :         m_aUIntStack.push_back( i );
     268         711 :     }
     269         291 :     void assign_action_double( double d )
     270             :     {
     271         291 :         m_fDouble = d;
     272         291 :     }
     273             :     #endif
     274             : 
     275           0 :     void parseError( const char* pMessage, iteratorT pLocation )
     276             :     {
     277           0 :         throw_( pLocation, pMessage );
     278           0 :     }
     279             : 
     280         507 :     OString iteratorToString( iteratorT first, iteratorT last ) const
     281             :     {
     282         507 :         OStringBuffer aStr( 32 );
     283        5214 :         while( first != last )
     284             :         {
     285        4200 :             aStr.append( *first );
     286        4200 :             ++first;
     287             :         }
     288         507 :         return aStr.makeStringAndClear();
     289             :     }
     290             : 
     291           3 :     void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
     292             :     {
     293           3 :         if( m_aObjectStack.empty() )
     294             :         {
     295           3 :             PDFFile* pFile = new PDFFile();
     296           3 :             pFile->m_nMinor = m_aUIntStack.back();
     297           3 :             m_aUIntStack.pop_back();
     298           3 :             pFile->m_nMajor = m_aUIntStack.back();
     299           3 :             m_aUIntStack.pop_back();
     300           3 :             m_aObjectStack.push_back( pFile );
     301             :         }
     302             :         else
     303           0 :             parseError( "found file header in unusual place", pBegin );
     304           3 :     }
     305             : 
     306           3 :     void pushComment( iteratorT first, iteratorT last )
     307             :     {
     308             :         // add a comment to the current stack element
     309             :         PDFComment* pComment =
     310           3 :             new PDFComment(iteratorToString(first,last));
     311           3 :         if( m_aObjectStack.empty() )
     312           0 :             m_aObjectStack.push_back( new PDFPart() );
     313           3 :         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
     314           3 :         if( pContainer == NULL )
     315           0 :             parseError( "comment without container", first );
     316           3 :         pContainer->m_aSubElements.push_back( pComment );
     317           3 :     }
     318             : 
     319        1008 :     void insertNewValue( PDFEntry* pNewValue, iteratorT pPos )
     320             :     {
     321        1008 :         PDFContainer* pContainer = NULL;
     322        1008 :         const char* pMsg = NULL;
     323        2016 :         if( ! m_aObjectStack.empty() &&
     324        1008 :             (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != NULL )
     325             :         {
     326        1296 :             if( dynamic_cast<PDFDict*>(pContainer) == NULL      &&
     327         288 :                 dynamic_cast<PDFArray*>(pContainer) == NULL )
     328             :             {
     329          78 :                 PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
     330          78 :                 if( pObj )
     331             :                 {
     332          75 :                     if( pObj->m_pObject == NULL )
     333          75 :                         pObj->m_pObject = pNewValue;
     334             :                     else
     335             :                     {
     336           0 :                         pMsg = "second value for object";
     337           0 :                         pContainer = NULL;
     338             :                     }
     339             :                 }
     340           3 :                 else if( dynamic_cast<PDFDict*>(pNewValue) )
     341             :                 {
     342           3 :                     PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
     343           3 :                     if( pTrailer )
     344             :                     {
     345           3 :                         if( pTrailer->m_pDict == NULL )
     346           3 :                             pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue);
     347             :                         else
     348           0 :                             pContainer = NULL;
     349             :                     }
     350             :                     else
     351           0 :                         pContainer = NULL;
     352             :                 }
     353             :                 else
     354           0 :                     pContainer = NULL;
     355             :             }
     356             :         }
     357        1008 :         if( pContainer )
     358        1008 :             pContainer->m_aSubElements.push_back( pNewValue );
     359             :         else
     360             :         {
     361           0 :             if( ! pMsg )
     362             :             {
     363           0 :                 if( dynamic_cast<PDFContainer*>(pNewValue) )
     364           0 :                     pMsg = "array without container";
     365             :                 else
     366           0 :                     pMsg = "value without container";
     367             :             }
     368           0 :             delete pNewValue;
     369           0 :             parseError( pMsg, pPos );
     370             :         }
     371        1008 :     }
     372             : 
     373         486 :     void pushName( iteratorT first, iteratorT last )
     374             :     {
     375         486 :         insertNewValue( new PDFName(iteratorToString(first,last)), first );
     376         486 :     }
     377             : 
     378         291 :     void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
     379             :     {
     380         291 :         insertNewValue( new PDFNumber(m_fDouble), first );
     381         291 :     }
     382             : 
     383          18 :     void pushString( iteratorT first, iteratorT last )
     384             :     {
     385          18 :         insertNewValue( new PDFString(iteratorToString(first,last)), first );
     386          18 :     }
     387             : 
     388           3 :     void pushBool( iteratorT first, iteratorT last )
     389             :     {
     390           3 :         insertNewValue( new PDFBool( (last-first == 4) ), first );
     391           3 :     }
     392             : 
     393           6 :     void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     394             :     {
     395           6 :         insertNewValue( new PDFNull(), first );
     396           6 :     }
     397             : 
     398             : 
     399          75 :     void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
     400             :     {
     401          75 :         if( m_aObjectStack.empty() )
     402           0 :             m_aObjectStack.push_back( new PDFPart() );
     403             : 
     404          75 :         unsigned int nGeneration = m_aUIntStack.back();
     405          75 :         m_aUIntStack.pop_back();
     406          75 :         unsigned int nObject = m_aUIntStack.back();
     407          75 :         m_aUIntStack.pop_back();
     408             : 
     409          75 :         PDFObject* pObj = new PDFObject( nObject, nGeneration );
     410          75 :         pObj->m_nOffset = first - m_aGlobalBegin;
     411             : 
     412          75 :         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
     413         225 :         if( pContainer &&
     414         150 :             ( dynamic_cast<PDFFile*>(pContainer) ||
     415             :               dynamic_cast<PDFPart*>(pContainer) ) )
     416             :         {
     417          75 :             pContainer->m_aSubElements.push_back( pObj );
     418          75 :             m_aObjectStack.push_back( pObj );
     419             :         }
     420             :         else
     421           0 :             parseError( "object in wrong place", first );
     422          75 :     }
     423             : 
     424          75 :     void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     425             :     {
     426          75 :         if( m_aObjectStack.empty() )
     427           0 :             parseError( "endobj without obj", first );
     428          75 :         else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == NULL )
     429           0 :             parseError( "spurious endobj", first );
     430             :         else
     431          75 :             m_aObjectStack.pop_back();
     432          75 :     }
     433             : 
     434          84 :     void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     435             :     {
     436          84 :         unsigned int nGeneration = m_aUIntStack.back();
     437          84 :         m_aUIntStack.pop_back();
     438          84 :         unsigned int nObject = m_aUIntStack.back();
     439          84 :         m_aUIntStack.pop_back();
     440          84 :         insertNewValue( new PDFObjectRef(nObject,nGeneration), first );
     441          84 :     }
     442             : 
     443          75 :     void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     444             :     {
     445          75 :         PDFDict* pDict = new PDFDict();
     446          75 :         pDict->m_nOffset = first - m_aGlobalBegin;
     447             : 
     448          75 :         insertNewValue( pDict, first );
     449             :         // will not come here if insertion fails (exception)
     450          75 :         m_aObjectStack.push_back( pDict );
     451          75 :     }
     452          75 :     void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     453             :     {
     454          75 :         PDFDict* pDict = NULL;
     455          75 :         if( m_aObjectStack.empty() )
     456           0 :             parseError( "dictionary end without begin", first );
     457          75 :         else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == NULL )
     458           0 :             parseError( "spurious dictionary end", first );
     459             :         else
     460          75 :             m_aObjectStack.pop_back();
     461             : 
     462          75 :         PDFEntry* pOffender = pDict->buildMap();
     463          75 :         if( pOffender )
     464             :         {
     465           0 :             StringEmitContext aCtx;
     466           0 :             aCtx.write( "offending dictionary element: ", 30 );
     467           0 :             pOffender->emit( aCtx );
     468           0 :             m_aErrorString = aCtx.getString();
     469           0 :             parseError( m_aErrorString.getStr(), first );
     470             :         }
     471          75 :     }
     472             : 
     473          45 :     void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     474             :     {
     475          45 :         PDFArray* pArray = new PDFArray();
     476          45 :         pArray->m_nOffset = first - m_aGlobalBegin;
     477             : 
     478          45 :         insertNewValue( pArray, first );
     479             :         // will not come here if insertion fails (exception)
     480          45 :         m_aObjectStack.push_back( pArray );
     481          45 :     }
     482             : 
     483          45 :     void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     484             :     {
     485          45 :         if( m_aObjectStack.empty() )
     486           0 :             parseError( "array end without begin", first );
     487          45 :         else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == NULL )
     488           0 :             parseError( "spurious array end", first );
     489             :         else
     490          45 :             m_aObjectStack.pop_back();
     491          45 :     }
     492             : 
     493          24 :     void emitStream( iteratorT first, iteratorT last )
     494             :     {
     495          24 :         if( m_aObjectStack.empty() )
     496           0 :             parseError( "stream without object", first );
     497          24 :         PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
     498          24 :         if( pObj && pObj->m_pObject )
     499             :         {
     500          24 :             if( pObj->m_pStream )
     501           0 :                 parseError( "multiple streams in object", first );
     502             : 
     503          24 :             PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
     504          24 :             if( pDict )
     505             :             {
     506          24 :                 PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
     507             : 
     508          24 :                 pObj->m_pStream = pStream;
     509          24 :                 pObj->m_aSubElements.push_back( pStream );
     510          24 :             }
     511             :         }
     512             :         else
     513           0 :             parseError( "stream without object", first );
     514          24 :     }
     515             : 
     516           3 :     void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     517             :     {
     518           3 :         if( m_aObjectStack.empty() )
     519           0 :             m_aObjectStack.push_back( new PDFPart() );
     520             : 
     521           3 :         PDFTrailer* pTrailer = new PDFTrailer();
     522           3 :         pTrailer->m_nOffset = first - m_aGlobalBegin;
     523             : 
     524           3 :         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
     525           9 :         if( pContainer &&
     526           6 :             ( dynamic_cast<PDFFile*>(pContainer) ||
     527             :               dynamic_cast<PDFPart*>(pContainer) ) )
     528             :         {
     529           3 :             pContainer->m_aSubElements.push_back( pTrailer );
     530           3 :             m_aObjectStack.push_back( pTrailer );
     531             :         }
     532             :         else
     533           0 :             parseError( "trailer in wrong place", first );
     534           3 :     }
     535             : 
     536           3 :     void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     537             :     {
     538           3 :         if( m_aObjectStack.empty() )
     539           0 :             parseError( "%%EOF without trailer", first );
     540           3 :         else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == NULL )
     541           0 :             parseError( "spurious %%EOF", first );
     542             :         else
     543           3 :             m_aObjectStack.pop_back();
     544           3 :     }
     545             : };
     546             : 
     547             : #ifdef WIN32
     548             : PDFEntry* PDFReader::read( const char* pBuffer, unsigned int nLen )
     549             : {
     550             :     PDFGrammar<const char*> aGrammar( pBuffer );
     551             : 
     552             :     try
     553             :     {
     554             : #if OSL_DEBUG_LEVEL > 1
     555             :         boost::spirit::parse_info<const char*> aInfo =
     556             : #endif
     557             :             boost::spirit::parse( pBuffer,
     558             :                                   pBuffer+nLen,
     559             :                                   aGrammar,
     560             :                                   boost::spirit::space_p );
     561             : #if OSL_DEBUG_LEVEL > 1
     562             :         SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop = " << aInfo.stop << " (buff=" << pBuffer << ", offset = " << aInfo.stop - pBuffer << "), hit = " << aInfo.hit ? "true" : "false" << ", full = " << aInfo.full ? "true" : "false" << ", length = " << (int)aInfo.length );
     563             : #endif
     564             :     }
     565             :     catch( const parser_error<const char*, const char*>& rError )
     566             :     {
     567             : #if OSL_DEBUG_LEVEL > 1
     568             :         OUString aTmp;
     569             :         unsigned int nElem = aGrammar.m_aObjectStack.size()
     570             :         for( unsigned int i = 0; i < nElem; i++ )
     571             :             aTmp += "   " + OUString(typeid( *(aGrammar.m_aObjectStack[i]) ).name());
     572             : 
     573             :         SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - pBuffer << ", object stack: " << aTmp);
     574             : #endif
     575             :     }
     576             : 
     577             :     PDFEntry* pRet = NULL;
     578             :     unsigned int nEntries = aGrammar.m_aObjectStack.size();
     579             :     if( nEntries == 1 )
     580             :     {
     581             :         pRet = aGrammar.m_aObjectStack.back();
     582             :         aGrammar.m_aObjectStack.pop_back();
     583             :     }
     584             : #if OSL_DEBUG_LEVEL > 1
     585             :     else if( nEntries > 1 )
     586             :         SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse" );
     587             : #endif
     588             : 
     589             :     return pRet;
     590             : }
     591             : #endif
     592             : 
     593           3 : PDFEntry* PDFReader::read( const char* pFileName )
     594             : {
     595             : #ifdef WIN32
     596             :     /* #i106583#
     597             :        since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
     598             :        C++ stdlib istream_iterator does not allow "-" apparently
     599             :        using spirit 2.0 doesn't work in our environment with the MSC
     600             : 
     601             :        So for the time being bite the bullet and read the whole file.
     602             :        FIXME: give Spirit 2.x another try when we upgrade boost again.
     603             :     */
     604             :     PDFEntry* pRet = NULL;
     605             :     FILE* fp = fopen( pFileName, "rb" );
     606             :     if( fp )
     607             :     {
     608             :         fseek( fp, 0, SEEK_END );
     609             :         unsigned int nLen = (unsigned int)ftell( fp );
     610             :         fseek( fp, 0, SEEK_SET );
     611             :         char* pBuf = (char*)rtl_allocateMemory( nLen );
     612             :         if( pBuf )
     613             :         {
     614             :             fread( pBuf, 1, nLen, fp );
     615             :             pRet = read( pBuf, nLen );
     616             :             rtl_freeMemory( pBuf );
     617             :         }
     618             :         fclose( fp );
     619             :     }
     620             :     return pRet;
     621             : #else
     622           3 :     file_iterator<> file_start( pFileName );
     623           3 :     if( ! file_start )
     624           0 :         return NULL;
     625           6 :     file_iterator<> file_end = file_start.make_end();
     626           6 :     PDFGrammar< file_iterator<> > aGrammar( file_start );
     627             : 
     628             :     try
     629             :     {
     630             : #if OSL_DEBUG_LEVEL > 1
     631             :         boost::spirit::parse_info< file_iterator<> > aInfo =
     632             : #endif
     633             :             boost::spirit::parse( file_start,
     634             :                                   file_end,
     635             :                                   aGrammar,
     636           3 :                                   boost::spirit::space_p );
     637             : #if OSL_DEBUG_LEVEL > 1
     638             :         SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop at offset = " << aInfo.stop - file_start << ", hit = " << (aInfo.hit ? "true" : "false") << ", full = " << (aInfo.full ? "true" : "false") << ", length = " << aInfo.length);
     639             : #endif
     640             :     }
     641           0 :     catch( const parser_error< const char*, file_iterator<> >& rError )
     642             :     {
     643             : #if OSL_DEBUG_LEVEL > 1
     644             :         OUString aTmp;
     645             :         unsigned int nElem = aGrammar.m_aObjectStack.size();
     646             :         for( unsigned int i = 0; i < nElem; i++ )
     647             :         {
     648             :             aTmp += "   ";
     649             :             aTmp += OUString(typeid( *(aGrammar.m_aObjectStack[i]) ).name(),
     650             :                              strlen(typeid( *(aGrammar.m_aObjectStack[i]) ).name()),
     651             :                              RTL_TEXTENCODING_ASCII_US);
     652             :         }
     653             :         SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - file_start << ", object stack: " << aTmp);
     654             : #endif
     655             :     }
     656             : 
     657           3 :     PDFEntry* pRet = NULL;
     658           3 :     unsigned int nEntries = aGrammar.m_aObjectStack.size();
     659           3 :     if( nEntries == 1 )
     660             :     {
     661           3 :         pRet = aGrammar.m_aObjectStack.back();
     662           3 :         aGrammar.m_aObjectStack.pop_back();
     663             :     }
     664             : #if OSL_DEBUG_LEVEL > 1
     665             :     else if( nEntries > 1 )
     666             :     {
     667             :         SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse");
     668             :         for( unsigned int i = 0; i < nEntries; i++ )
     669             :         {
     670             :             SAL_WARN("sdext.pdfimport.pdfparse", typeid(*aGrammar.m_aObjectStack[i]).name());
     671             :             PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
     672             :             if( pObj )
     673             :                 SAL_WARN("sdext.pdfimport.pdfparse", "   -> object " << pObj->m_nNumber << " generation " << pObj->m_nGeneration);
     674             :             else
     675             :                 SAL_WARN("sdext.pdfimport.pdfparse", "(type " << typeid(*aGrammar.m_aObjectStack[i]).name() << ")");
     676             :         }
     677             :     }
     678             : #endif
     679           6 :     return pRet;
     680             : #endif // WIN32
     681           3 : }
     682             : 
     683             : #if defined __SUNPRO_CC
     684             : #pragma enable_warn
     685             : #elif defined _MSC_VER
     686             : #pragma warning(pop)
     687             : #endif
     688             : 
     689             : 
     690             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10