LCOV - code coverage report
Current view: top level - sdext/source/pdfimport/pdfparse - pdfparse.cxx (source / functions) Hit Total Coverage
Test: commit 0e63ca4fde4e446f346e35849c756a30ca294aab Lines: 231 291 79.4 %
Date: 2014-04-11 Functions: 30 39 76.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : 
      21             : #if defined _MSC_VER
      22             : #pragma warning(push, 1)
      23             : #endif
      24             : 
      25             : #include "pdfparse.hxx"
      26             : 
      27             : // workaround windows compiler: do not include multi_pass.hpp
      28             : #include <boost/spirit/include/classic_core.hpp>
      29             : #include <boost/spirit/include/classic_utility.hpp>
      30             : #include <boost/spirit/include/classic_error_handling.hpp>
      31             : #include <boost/spirit/include/classic_file_iterator.hpp>
      32             : #include <boost/bind.hpp>
      33             : #include <string.h>
      34             : 
      35             : #include <rtl/strbuf.hxx>
      36             : #include <rtl/alloc.h>
      37             : 
      38             : // disable warnings again because someone along the line has enabled them
      39             : #if defined _MSC_VER
      40             : #pragma warning(push, 1)
      41             : #endif
      42             : 
      43             : using namespace boost::spirit;
      44             : using namespace pdfparse;
      45             : 
      46             : 
      47             : class StringEmitContext : public EmitContext
      48             : {
      49             :     OStringBuffer m_aBuf;
      50             :     public:
      51           0 :     StringEmitContext() : EmitContext(), m_aBuf(256) {}
      52           0 :     virtual ~StringEmitContext() {}
      53           0 :     virtual bool write( const void* pBuf, unsigned int nLen ) throw() SAL_OVERRIDE
      54             :     {
      55           0 :         m_aBuf.append( (const sal_Char*)pBuf, nLen );
      56           0 :         return true;
      57             :     }
      58           0 :     virtual unsigned int getCurPos() throw() SAL_OVERRIDE { return m_aBuf.getLength(); }
      59           0 :     virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw() SAL_OVERRIDE
      60           0 :     { return (nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) ?
      61           0 :              write( m_aBuf.getStr() + nOrigOffset, nLen ) : false; }
      62           0 :     virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw() SAL_OVERRIDE
      63             :     {
      64           0 :         if( nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) )
      65             :         {
      66           0 :             memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
      67           0 :             return nLen;
      68             :         }
      69           0 :         return 0;
      70             :     }
      71             : 
      72           0 :     OString getString() { return m_aBuf.makeStringAndClear(); }
      73             : };
      74             : 
      75             : template< class iteratorT >
      76             : class PDFGrammar :  public grammar< PDFGrammar<iteratorT> >
      77             : {
      78             : public:
      79             : 
      80           3 :     PDFGrammar( const iteratorT& first )
      81           3 :     : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
      82           3 :     ~PDFGrammar()
      83             :     {
      84           3 :         if( !m_aObjectStack.empty() )
      85           0 :             delete m_aObjectStack.front();
      86           3 :     }
      87             : 
      88             :     double m_fDouble;
      89             :     std::vector< unsigned int > m_aUIntStack;
      90             :     std::vector< PDFEntry* >    m_aObjectStack;
      91             :     OString                m_aErrorString;
      92             :     iteratorT                   m_aGlobalBegin;
      93             : 
      94             : public:
      95             :     struct pdf_string_parser
      96             :     {
      97             :         typedef nil_t result_t;
      98             :         template <typename ScannerT>
      99             :         std::ptrdiff_t
     100           6 :         operator()(ScannerT const& scan, result_t&) const
     101             :         {
     102           6 :             std::ptrdiff_t len = 0;
     103             : 
     104           6 :             int nBraceLevel = 0;
     105         174 :             while( ! scan.at_end() )
     106             :             {
     107         168 :                 char c = *scan;
     108         168 :                 if( c == ')' )
     109             :                 {
     110           6 :                     nBraceLevel--;
     111           6 :                     if( nBraceLevel < 0 )
     112           6 :                         break;
     113             :                 }
     114         162 :                 else if( c == '(' )
     115           0 :                     nBraceLevel++;
     116         162 :                 else if( c == '\\' ) // ignore escaped braces
     117             :                 {
     118           0 :                     ++len;
     119           0 :                     ++scan;
     120           0 :                     if( scan.at_end() )
     121           0 :                         break;
     122             :                 }
     123         162 :                 ++len;
     124         162 :                 ++scan;
     125             :             }
     126           6 :             return scan.at_end() ? -1 : len;
     127             :         }
     128             :     };
     129             : 
     130             :     template< typename ScannerT >
     131           3 :     struct definition
     132             :     {
     133           3 :         definition( const PDFGrammar<iteratorT>& rSelf )
     134           3 :         {
     135           3 :             PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
     136             : 
     137             :             // workaround workshop compiler: comment_p doesn't work
     138             :             // comment     = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
     139           3 :             comment     = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
     140             : 
     141           3 :             boolean     = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
     142             : 
     143             :             // workaround workshop compiler: confix_p doesn't work
     144             :             //stream      = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
     145           3 :             stream      = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
     146             : 
     147           3 :             name        = lexeme_d[
     148             :                             ch_p('/')
     149             :                             >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
     150           3 :                                [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
     151             : 
     152             :             // workaround workshop compiler: confix_p doesn't work
     153             :             //stringtype  = ( confix_p("(",*anychar_p, ")") |
     154             :             //                confix_p("<",*xdigit_p,  ">") )
     155             :             //              [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
     156             : 
     157           9 :             stringtype  = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
     158           6 :                             ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
     159          12 :                           [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
     160             : 
     161           3 :             null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
     162             : 
     163             :             #ifdef USE_ASSIGN_ACTOR
     164             :             objectref   = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
     165             :                             >> uint_p[push_back_a(pSelf->m_aUIntStack)]
     166             :                             >> ch_p('R')
     167             :                             >> eps_p
     168             :                           )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
     169             :             #else
     170           9 :             objectref   = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     171          12 :                             >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     172          12 :                             >> ch_p('R')
     173           6 :                             >> eps_p
     174          12 :                           )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
     175             :             #endif
     176             : 
     177             :             #ifdef USE_ASSIGN_ACTOR
     178             :             simple_type = objectref | name |
     179             :                           ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
     180             :                           [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
     181             :                           | stringtype | boolean | null_object;
     182             :             #else
     183           9 :             simple_type = objectref | name |
     184           6 :                           ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
     185          12 :                           [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
     186           9 :                           | stringtype | boolean | null_object;
     187             :             #endif
     188             : 
     189           3 :             dict_begin  = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
     190           3 :             dict_end    = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
     191             : 
     192           3 :             array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
     193           3 :             array_end   = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
     194             : 
     195             :             #ifdef USE_ASSIGN_ACTOR
     196             :             object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
     197             :                           >> uint_p[push_back_a(pSelf->m_aUIntStack)]
     198             :                           >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
     199             :             #else
     200           9 :             object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     201          12 :                           >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     202          12 :                           >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
     203             :             #endif
     204           3 :             object_end  = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
     205             : 
     206           9 :             xref        = str_p( "xref" ) >> uint_p >> uint_p
     207          12 :                           >> lexeme_d[
     208           6 :                                 +( repeat_p(10)[digit_p]
     209           6 :                                    >> blank_p
     210          12 :                                    >> repeat_p(5)[digit_p]
     211           6 :                                    >> blank_p
     212          12 :                                    >> ( ch_p('n') | ch_p('f') )
     213          12 :                                    >> repeat_p(2)[space_p]
     214             :                                  ) ];
     215             : 
     216           9 :             dict_element= dict_begin | comment | simple_type
     217           9 :                           | array_begin | array_end | dict_end;
     218             : 
     219           3 :             object      = object_begin
     220           6 :                           >> *dict_element
     221          12 :                           >> !stream
     222           9 :                           >> object_end;
     223             : 
     224           9 :             trailer     = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
     225          12 :                           >> *dict_element
     226          12 :                           >> str_p("startxref")
     227           6 :                           >> uint_p
     228          12 :                           >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
     229             : 
     230             :             #ifdef USE_ASSIGN_ACTOR
     231             :             pdfrule     = ! (lexeme_d[
     232             :                                 str_p( "%PDF-" )
     233             :                                 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
     234             :                                 >> ch_p('.')
     235             :                                 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
     236             :                                 >> *((~ch_p('\r') & ~ch_p('\n')))
     237             :                                 >> eol_p
     238             :                              ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
     239             :                           >> *( comment | object | ( xref >> trailer ) );
     240             :             #else
     241          15 :             pdfrule     = ! (lexeme_d[
     242             :                                 str_p( "%PDF-" )
     243           6 :                                 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     244          12 :                                 >> ch_p('.')
     245          12 :                                 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     246          12 :                                 >> *((~ch_p('\r') & ~ch_p('\n')))
     247           6 :                                 >> eol_p
     248          12 :                              ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
     249          12 :                           >> *( comment | object | ( xref >> trailer ) );
     250             :             #endif
     251           3 :         }
     252             :         rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
     253             :                          objectref, array, value, dict_element, dict_begin, dict_end,
     254             :                          array_begin, array_end, object, object_begin, object_end,
     255             :                          xref, trailer, pdfrule;
     256             : 
     257           3 :         const rule< ScannerT >& start() const { return pdfrule; }
     258             :     };
     259             : 
     260             :     #ifndef USE_ASSIGN_ACTOR
     261         711 :     void push_back_action_uint( unsigned int i )
     262             :     {
     263         711 :         m_aUIntStack.push_back( i );
     264         711 :     }
     265         291 :     void assign_action_double( double d )
     266             :     {
     267         291 :         m_fDouble = d;
     268         291 :     }
     269             :     #endif
     270             : 
     271           0 :     void parseError( const char* pMessage, iteratorT pLocation )
     272             :     {
     273           0 :         throw_( pLocation, pMessage );
     274           0 :     }
     275             : 
     276         507 :     OString iteratorToString( iteratorT first, iteratorT last ) const
     277             :     {
     278         507 :         OStringBuffer aStr( 32 );
     279        5214 :         while( first != last )
     280             :         {
     281        4200 :             aStr.append( *first );
     282        4200 :             ++first;
     283             :         }
     284         507 :         return aStr.makeStringAndClear();
     285             :     }
     286             : 
     287           3 :     void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
     288             :     {
     289           3 :         if( m_aObjectStack.empty() )
     290             :         {
     291           3 :             PDFFile* pFile = new PDFFile();
     292           3 :             pFile->m_nMinor = m_aUIntStack.back();
     293           3 :             m_aUIntStack.pop_back();
     294           3 :             pFile->m_nMajor = m_aUIntStack.back();
     295           3 :             m_aUIntStack.pop_back();
     296           3 :             m_aObjectStack.push_back( pFile );
     297             :         }
     298             :         else
     299           0 :             parseError( "found file header in unusual place", pBegin );
     300           3 :     }
     301             : 
     302           3 :     void pushComment( iteratorT first, iteratorT last )
     303             :     {
     304             :         // add a comment to the current stack element
     305             :         PDFComment* pComment =
     306           3 :             new PDFComment(iteratorToString(first,last));
     307           3 :         if( m_aObjectStack.empty() )
     308           0 :             m_aObjectStack.push_back( new PDFPart() );
     309           3 :         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
     310           3 :         if( pContainer == NULL )
     311           0 :             parseError( "comment without container", first );
     312           3 :         pContainer->m_aSubElements.push_back( pComment );
     313           3 :     }
     314             : 
     315        1008 :     void insertNewValue( PDFEntry* pNewValue, iteratorT pPos )
     316             :     {
     317        1008 :         PDFContainer* pContainer = NULL;
     318        1008 :         const char* pMsg = NULL;
     319        2016 :         if( ! m_aObjectStack.empty() &&
     320        1008 :             (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != NULL )
     321             :         {
     322        1296 :             if( dynamic_cast<PDFDict*>(pContainer) == NULL      &&
     323         288 :                 dynamic_cast<PDFArray*>(pContainer) == NULL )
     324             :             {
     325          78 :                 PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
     326          78 :                 if( pObj )
     327             :                 {
     328          75 :                     if( pObj->m_pObject == NULL )
     329          75 :                         pObj->m_pObject = pNewValue;
     330             :                     else
     331             :                     {
     332           0 :                         pMsg = "second value for object";
     333           0 :                         pContainer = NULL;
     334             :                     }
     335             :                 }
     336           3 :                 else if( dynamic_cast<PDFDict*>(pNewValue) )
     337             :                 {
     338           3 :                     PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
     339           3 :                     if( pTrailer )
     340             :                     {
     341           3 :                         if( pTrailer->m_pDict == NULL )
     342           3 :                             pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue);
     343             :                         else
     344           0 :                             pContainer = NULL;
     345             :                     }
     346             :                     else
     347           0 :                         pContainer = NULL;
     348             :                 }
     349             :                 else
     350           0 :                     pContainer = NULL;
     351             :             }
     352             :         }
     353        1008 :         if( pContainer )
     354        1008 :             pContainer->m_aSubElements.push_back( pNewValue );
     355             :         else
     356             :         {
     357           0 :             if( ! pMsg )
     358             :             {
     359           0 :                 if( dynamic_cast<PDFContainer*>(pNewValue) )
     360           0 :                     pMsg = "array without container";
     361             :                 else
     362           0 :                     pMsg = "value without container";
     363             :             }
     364           0 :             delete pNewValue;
     365           0 :             parseError( pMsg, pPos );
     366             :         }
     367        1008 :     }
     368             : 
     369         486 :     void pushName( iteratorT first, iteratorT last )
     370             :     {
     371         486 :         insertNewValue( new PDFName(iteratorToString(first,last)), first );
     372         486 :     }
     373             : 
     374         291 :     void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
     375             :     {
     376         291 :         insertNewValue( new PDFNumber(m_fDouble), first );
     377         291 :     }
     378             : 
     379          18 :     void pushString( iteratorT first, iteratorT last )
     380             :     {
     381          18 :         insertNewValue( new PDFString(iteratorToString(first,last)), first );
     382          18 :     }
     383             : 
     384           3 :     void pushBool( iteratorT first, iteratorT last )
     385             :     {
     386           3 :         insertNewValue( new PDFBool( (last-first == 4) ), first );
     387           3 :     }
     388             : 
     389           6 :     void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     390             :     {
     391           6 :         insertNewValue( new PDFNull(), first );
     392           6 :     }
     393             : 
     394             : 
     395          75 :     void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
     396             :     {
     397          75 :         if( m_aObjectStack.empty() )
     398           0 :             m_aObjectStack.push_back( new PDFPart() );
     399             : 
     400          75 :         unsigned int nGeneration = m_aUIntStack.back();
     401          75 :         m_aUIntStack.pop_back();
     402          75 :         unsigned int nObject = m_aUIntStack.back();
     403          75 :         m_aUIntStack.pop_back();
     404             : 
     405          75 :         PDFObject* pObj = new PDFObject( nObject, nGeneration );
     406          75 :         pObj->m_nOffset = first - m_aGlobalBegin;
     407             : 
     408          75 :         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
     409         225 :         if( pContainer &&
     410         150 :             ( dynamic_cast<PDFFile*>(pContainer) ||
     411             :               dynamic_cast<PDFPart*>(pContainer) ) )
     412             :         {
     413          75 :             pContainer->m_aSubElements.push_back( pObj );
     414          75 :             m_aObjectStack.push_back( pObj );
     415             :         }
     416             :         else
     417           0 :             parseError( "object in wrong place", first );
     418          75 :     }
     419             : 
     420          75 :     void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     421             :     {
     422          75 :         if( m_aObjectStack.empty() )
     423           0 :             parseError( "endobj without obj", first );
     424          75 :         else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == NULL )
     425           0 :             parseError( "spurious endobj", first );
     426             :         else
     427          75 :             m_aObjectStack.pop_back();
     428          75 :     }
     429             : 
     430          84 :     void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     431             :     {
     432          84 :         unsigned int nGeneration = m_aUIntStack.back();
     433          84 :         m_aUIntStack.pop_back();
     434          84 :         unsigned int nObject = m_aUIntStack.back();
     435          84 :         m_aUIntStack.pop_back();
     436          84 :         insertNewValue( new PDFObjectRef(nObject,nGeneration), first );
     437          84 :     }
     438             : 
     439          75 :     void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     440             :     {
     441          75 :         PDFDict* pDict = new PDFDict();
     442          75 :         pDict->m_nOffset = first - m_aGlobalBegin;
     443             : 
     444          75 :         insertNewValue( pDict, first );
     445             :         // will not come here if insertion fails (exception)
     446          75 :         m_aObjectStack.push_back( pDict );
     447          75 :     }
     448          75 :     void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     449             :     {
     450          75 :         PDFDict* pDict = NULL;
     451          75 :         if( m_aObjectStack.empty() )
     452           0 :             parseError( "dictionary end without begin", first );
     453          75 :         else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == NULL )
     454           0 :             parseError( "spurious dictionary end", first );
     455             :         else
     456          75 :             m_aObjectStack.pop_back();
     457             : 
     458          75 :         PDFEntry* pOffender = pDict->buildMap();
     459          75 :         if( pOffender )
     460             :         {
     461           0 :             StringEmitContext aCtx;
     462           0 :             aCtx.write( "offending dictionary element: ", 30 );
     463           0 :             pOffender->emit( aCtx );
     464           0 :             m_aErrorString = aCtx.getString();
     465           0 :             parseError( m_aErrorString.getStr(), first );
     466             :         }
     467          75 :     }
     468             : 
     469          45 :     void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     470             :     {
     471          45 :         PDFArray* pArray = new PDFArray();
     472          45 :         pArray->m_nOffset = first - m_aGlobalBegin;
     473             : 
     474          45 :         insertNewValue( pArray, first );
     475             :         // will not come here if insertion fails (exception)
     476          45 :         m_aObjectStack.push_back( pArray );
     477          45 :     }
     478             : 
     479          45 :     void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     480             :     {
     481          45 :         if( m_aObjectStack.empty() )
     482           0 :             parseError( "array end without begin", first );
     483          45 :         else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == NULL )
     484           0 :             parseError( "spurious array end", first );
     485             :         else
     486          45 :             m_aObjectStack.pop_back();
     487          45 :     }
     488             : 
     489          24 :     void emitStream( iteratorT first, iteratorT last )
     490             :     {
     491          24 :         if( m_aObjectStack.empty() )
     492           0 :             parseError( "stream without object", first );
     493          24 :         PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
     494          24 :         if( pObj && pObj->m_pObject )
     495             :         {
     496          24 :             if( pObj->m_pStream )
     497           0 :                 parseError( "multiple streams in object", first );
     498             : 
     499          24 :             PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
     500          24 :             if( pDict )
     501             :             {
     502          24 :                 PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
     503             : 
     504          24 :                 pObj->m_pStream = pStream;
     505          24 :                 pObj->m_aSubElements.push_back( pStream );
     506          24 :             }
     507             :         }
     508             :         else
     509           0 :             parseError( "stream without object", first );
     510          24 :     }
     511             : 
     512           3 :     void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     513             :     {
     514           3 :         if( m_aObjectStack.empty() )
     515           0 :             m_aObjectStack.push_back( new PDFPart() );
     516             : 
     517           3 :         PDFTrailer* pTrailer = new PDFTrailer();
     518           3 :         pTrailer->m_nOffset = first - m_aGlobalBegin;
     519             : 
     520           3 :         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
     521           9 :         if( pContainer &&
     522           6 :             ( dynamic_cast<PDFFile*>(pContainer) ||
     523             :               dynamic_cast<PDFPart*>(pContainer) ) )
     524             :         {
     525           3 :             pContainer->m_aSubElements.push_back( pTrailer );
     526           3 :             m_aObjectStack.push_back( pTrailer );
     527             :         }
     528             :         else
     529           0 :             parseError( "trailer in wrong place", first );
     530           3 :     }
     531             : 
     532           3 :     void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     533             :     {
     534           3 :         if( m_aObjectStack.empty() )
     535           0 :             parseError( "%%EOF without trailer", first );
     536           3 :         else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == NULL )
     537           0 :             parseError( "spurious %%EOF", first );
     538             :         else
     539           3 :             m_aObjectStack.pop_back();
     540           3 :     }
     541             : };
     542             : 
     543             : #ifdef WIN32
     544             : PDFEntry* PDFReader::read( const char* pBuffer, unsigned int nLen )
     545             : {
     546             :     PDFGrammar<const char*> aGrammar( pBuffer );
     547             : 
     548             :     try
     549             :     {
     550             : #if OSL_DEBUG_LEVEL > 1
     551             :         boost::spirit::parse_info<const char*> aInfo =
     552             : #endif
     553             :             boost::spirit::parse( pBuffer,
     554             :                                   pBuffer+nLen,
     555             :                                   aGrammar,
     556             :                                   boost::spirit::space_p );
     557             : #if OSL_DEBUG_LEVEL > 1
     558             :         SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop = " << aInfo.stop << " (buff=" << pBuffer << ", offset = " << aInfo.stop - pBuffer << "), hit = " << (aInfo.hit ? OUString("true") : OUString("false")) << ", full = " << (aInfo.full ? OUString("true") : OUString("false")) << ", length = " << (int)aInfo.length );
     559             : #endif
     560             :     }
     561             :     catch( const parser_error<const char*, const char*>& rError )
     562             :     {
     563             : #if OSL_DEBUG_LEVEL > 1
     564             :         OString aTmp;
     565             :         unsigned int nElem = aGrammar.m_aObjectStack.size();
     566             :         for( unsigned int i = 0; i < nElem; i++ )
     567             :             aTmp += "   " + OString(typeid( *(aGrammar.m_aObjectStack[i]) ).name());
     568             : 
     569             :         SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - pBuffer << ", object stack: " << aTmp);
     570             : #endif
     571             :     }
     572             : 
     573             :     PDFEntry* pRet = NULL;
     574             :     unsigned int nEntries = aGrammar.m_aObjectStack.size();
     575             :     if( nEntries == 1 )
     576             :     {
     577             :         pRet = aGrammar.m_aObjectStack.back();
     578             :         aGrammar.m_aObjectStack.pop_back();
     579             :     }
     580             : #if OSL_DEBUG_LEVEL > 1
     581             :     else if( nEntries > 1 )
     582             :         SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse" );
     583             : #endif
     584             : 
     585             :     return pRet;
     586             : }
     587             : #endif
     588             : 
     589           3 : PDFEntry* PDFReader::read( const char* pFileName )
     590             : {
     591             : #ifdef WIN32
     592             :     /* #i106583#
     593             :        since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
     594             :        C++ stdlib istream_iterator does not allow "-" apparently
     595             :        using spirit 2.0 doesn't work in our environment with the MSC
     596             : 
     597             :        So for the time being bite the bullet and read the whole file.
     598             :        FIXME: give Spirit 2.x another try when we upgrade boost again.
     599             :     */
     600             :     PDFEntry* pRet = NULL;
     601             :     FILE* fp = fopen( pFileName, "rb" );
     602             :     if( fp )
     603             :     {
     604             :         fseek( fp, 0, SEEK_END );
     605             :         unsigned int nLen = (unsigned int)ftell( fp );
     606             :         fseek( fp, 0, SEEK_SET );
     607             :         char* pBuf = (char*)rtl_allocateMemory( nLen );
     608             :         if( pBuf )
     609             :         {
     610             :             fread( pBuf, 1, nLen, fp );
     611             :             pRet = read( pBuf, nLen );
     612             :             rtl_freeMemory( pBuf );
     613             :         }
     614             :         fclose( fp );
     615             :     }
     616             :     return pRet;
     617             : #else
     618           3 :     file_iterator<> file_start( pFileName );
     619           3 :     if( ! file_start )
     620           0 :         return NULL;
     621           6 :     file_iterator<> file_end = file_start.make_end();
     622           6 :     PDFGrammar< file_iterator<> > aGrammar( file_start );
     623             : 
     624             :     try
     625             :     {
     626             : #if OSL_DEBUG_LEVEL > 1
     627             :         boost::spirit::parse_info< file_iterator<> > aInfo =
     628             : #endif
     629             :             boost::spirit::parse( file_start,
     630             :                                   file_end,
     631             :                                   aGrammar,
     632           3 :                                   boost::spirit::space_p );
     633             : #if OSL_DEBUG_LEVEL > 1
     634             :         SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop at offset = " << aInfo.stop - file_start << ", hit = " << (aInfo.hit ? "true" : "false") << ", full = " << (aInfo.full ? "true" : "false") << ", length = " << aInfo.length);
     635             : #endif
     636             :     }
     637           0 :     catch( const parser_error< const char*, file_iterator<> >& rError )
     638             :     {
     639             :         SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - file_start);
     640             : #if OSL_DEBUG_LEVEL > 1
     641             :         OUString aTmp;
     642             :         unsigned int nElem = aGrammar.m_aObjectStack.size();
     643             :         for( unsigned int i = 0; i < nElem; i++ )
     644             :         {
     645             :             aTmp += "   ";
     646             :             aTmp += OUString(typeid( *(aGrammar.m_aObjectStack[i]) ).name(),
     647             :                              strlen(typeid( *(aGrammar.m_aObjectStack[i]) ).name()),
     648             :                              RTL_TEXTENCODING_ASCII_US);
     649             :         }
     650             :         SAL_WARN("sdext.pdfimport.pdfparse", "parse error object stack: " << aTmp);
     651             : #endif
     652             :     }
     653             : 
     654           3 :     PDFEntry* pRet = NULL;
     655           3 :     unsigned int nEntries = aGrammar.m_aObjectStack.size();
     656           3 :     if( nEntries == 1 )
     657             :     {
     658           3 :         pRet = aGrammar.m_aObjectStack.back();
     659           3 :         aGrammar.m_aObjectStack.pop_back();
     660             :     }
     661             : #if OSL_DEBUG_LEVEL > 1
     662             :     else if( nEntries > 1 )
     663             :     {
     664             :         SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse");
     665             :         for( unsigned int i = 0; i < nEntries; i++ )
     666             :         {
     667             :             SAL_WARN("sdext.pdfimport.pdfparse", typeid(*aGrammar.m_aObjectStack[i]).name());
     668             :             PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
     669             :             if( pObj )
     670             :                 SAL_WARN("sdext.pdfimport.pdfparse", "   -> object " << pObj->m_nNumber << " generation " << pObj->m_nGeneration);
     671             :             else
     672             :                 SAL_WARN("sdext.pdfimport.pdfparse", "(type " << typeid(*aGrammar.m_aObjectStack[i]).name() << ")");
     673             :         }
     674             :     }
     675             : #endif
     676           6 :     return pRet;
     677             : #endif // WIN32
     678           3 : }
     679             : 
     680             : #if defined _MSC_VER
     681             : #pragma warning(pop)
     682             : #endif
     683             : 
     684             : 
     685             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10