LCOV - code coverage report
Current view: top level - sdext/source/pdfimport/pdfparse - pdfparse.cxx (source / functions) Hit Total Coverage
Test: commit 10e77ab3ff6f4314137acd6e2702a6e5c1ce1fae Lines: 231 291 79.4 %
Date: 2014-11-03 Functions: 30 39 76.9 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
       2             : /*
       3             :  * This file is part of the LibreOffice project.
       4             :  *
       5             :  * This Source Code Form is subject to the terms of the Mozilla Public
       6             :  * License, v. 2.0. If a copy of the MPL was not distributed with this
       7             :  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
       8             :  *
       9             :  * This file incorporates work covered by the following license notice:
      10             :  *
      11             :  *   Licensed to the Apache Software Foundation (ASF) under one or more
      12             :  *   contributor license agreements. See the NOTICE file distributed
      13             :  *   with this work for additional information regarding copyright
      14             :  *   ownership. The ASF licenses this file to you under the Apache
      15             :  *   License, Version 2.0 (the "License"); you may not use this file
      16             :  *   except in compliance with the License. You may obtain a copy of
      17             :  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
      18             :  */
      19             : 
      20             : 
      21             : #if defined _MSC_VER
      22             : #pragma warning(push, 1)
      23             : #endif
      24             : 
      25             : #include "pdfparse.hxx"
      26             : 
      27             : // workaround windows compiler: do not include multi_pass.hpp
      28             : #include <boost/spirit/include/classic_core.hpp>
      29             : #include <boost/spirit/include/classic_utility.hpp>
      30             : #include <boost/spirit/include/classic_error_handling.hpp>
      31             : #include <boost/spirit/include/classic_file_iterator.hpp>
      32             : #include <boost/bind.hpp>
      33             : #include <string.h>
      34             : 
      35             : #include <rtl/strbuf.hxx>
      36             : #include <rtl/alloc.h>
      37             : 
      38             : // disable warnings again because someone along the line has enabled them
      39             : #if defined _MSC_VER
      40             : #pragma warning(push, 1)
      41             : #endif
      42             : 
      43             : using namespace boost::spirit;
      44             : using namespace pdfparse;
      45             : 
      46             : 
      47             : class StringEmitContext : public EmitContext
      48             : {
      49             :     OStringBuffer m_aBuf;
      50             :     public:
      51           0 :     StringEmitContext() : EmitContext(), m_aBuf(256) {}
      52           0 :     virtual ~StringEmitContext() {}
      53           0 :     virtual bool write( const void* pBuf, unsigned int nLen ) throw() SAL_OVERRIDE
      54             :     {
      55           0 :         m_aBuf.append( (const sal_Char*)pBuf, nLen );
      56           0 :         return true;
      57             :     }
      58           0 :     virtual unsigned int getCurPos() throw() SAL_OVERRIDE { return m_aBuf.getLength(); }
      59           0 :     virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw() SAL_OVERRIDE
      60           0 :     { return (nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) ?
      61           0 :              write( m_aBuf.getStr() + nOrigOffset, nLen ) : false; }
      62           0 :     virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw() SAL_OVERRIDE
      63             :     {
      64           0 :         if( nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) )
      65             :         {
      66           0 :             memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
      67           0 :             return nLen;
      68             :         }
      69           0 :         return 0;
      70             :     }
      71             : 
      72           0 :     OString getString() { return m_aBuf.makeStringAndClear(); }
      73             : };
      74             : 
      75             : template< class iteratorT >
      76             : class PDFGrammar :  public grammar< PDFGrammar<iteratorT> >
      77             : {
      78             : public:
      79             : 
      80           6 :     PDFGrammar( const iteratorT& first )
      81           6 :     : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
      82           6 :     ~PDFGrammar()
      83             :     {
      84           6 :         if( !m_aObjectStack.empty() )
      85           0 :             delete m_aObjectStack.front();
      86           6 :     }
      87             : 
      88             :     double m_fDouble;
      89             :     std::vector< unsigned int > m_aUIntStack;
      90             :     std::vector< PDFEntry* >    m_aObjectStack;
      91             :     OString                m_aErrorString;
      92             :     iteratorT                   m_aGlobalBegin;
      93             : 
      94             : public:
      95             :     struct pdf_string_parser
      96             :     {
      97             :         typedef nil_t result_t;
      98             :         template <typename ScannerT>
      99             :         std::ptrdiff_t
     100          12 :         operator()(ScannerT const& scan, result_t&) const
     101             :         {
     102          12 :             std::ptrdiff_t len = 0;
     103             : 
     104          12 :             int nBraceLevel = 0;
     105         348 :             while( ! scan.at_end() )
     106             :             {
     107         336 :                 char c = *scan;
     108         336 :                 if( c == ')' )
     109             :                 {
     110          12 :                     nBraceLevel--;
     111          12 :                     if( nBraceLevel < 0 )
     112          12 :                         break;
     113             :                 }
     114         324 :                 else if( c == '(' )
     115           0 :                     nBraceLevel++;
     116         324 :                 else if( c == '\\' ) // ignore escaped braces
     117             :                 {
     118           0 :                     ++len;
     119           0 :                     ++scan;
     120           0 :                     if( scan.at_end() )
     121           0 :                         break;
     122             :                 }
     123         324 :                 ++len;
     124         324 :                 ++scan;
     125             :             }
     126          12 :             return scan.at_end() ? -1 : len;
     127             :         }
     128             :     };
     129             : 
     130             :     template< typename ScannerT >
     131           6 :     struct definition
     132             :     {
     133           6 :         definition( const PDFGrammar<iteratorT>& rSelf )
     134           6 :         {
     135           6 :             PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
     136             : 
     137             :             // workaround workshop compiler: comment_p doesn't work
     138             :             // comment     = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
     139           6 :             comment     = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
     140             : 
     141           6 :             boolean     = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
     142             : 
     143             :             // workaround workshop compiler: confix_p doesn't work
     144             :             //stream      = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
     145           6 :             stream      = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
     146             : 
     147           6 :             name        = lexeme_d[
     148             :                             ch_p('/')
     149             :                             >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
     150           6 :                                [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
     151             : 
     152             :             // workaround workshop compiler: confix_p doesn't work
     153             :             //stringtype  = ( confix_p("(",*anychar_p, ")") |
     154             :             //                confix_p("<",*xdigit_p,  ">") )
     155             :             //              [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
     156             : 
     157          18 :             stringtype  = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
     158          12 :                             ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
     159          24 :                           [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
     160             : 
     161           6 :             null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
     162             : 
     163             :             #ifdef USE_ASSIGN_ACTOR
     164             :             objectref   = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
     165             :                             >> uint_p[push_back_a(pSelf->m_aUIntStack)]
     166             :                             >> ch_p('R')
     167             :                             >> eps_p
     168             :                           )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
     169             :             #else
     170          18 :             objectref   = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     171          24 :                             >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     172          24 :                             >> ch_p('R')
     173          12 :                             >> eps_p
     174          24 :                           )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
     175             :             #endif
     176             : 
     177             :             #ifdef USE_ASSIGN_ACTOR
     178             :             simple_type = objectref | name |
     179             :                           ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
     180             :                           [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
     181             :                           | stringtype | boolean | null_object;
     182             :             #else
     183          18 :             simple_type = objectref | name |
     184          12 :                           ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
     185          24 :                           [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
     186          18 :                           | stringtype | boolean | null_object;
     187             :             #endif
     188             : 
     189           6 :             dict_begin  = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
     190           6 :             dict_end    = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
     191             : 
     192           6 :             array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
     193           6 :             array_end   = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
     194             : 
     195             :             #ifdef USE_ASSIGN_ACTOR
     196             :             object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
     197             :                           >> uint_p[push_back_a(pSelf->m_aUIntStack)]
     198             :                           >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
     199             :             #else
     200          18 :             object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     201          24 :                           >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     202          24 :                           >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
     203             :             #endif
     204           6 :             object_end  = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
     205             : 
     206          18 :             xref        = str_p( "xref" ) >> uint_p >> uint_p
     207          24 :                           >> lexeme_d[
     208          12 :                                 +( repeat_p(10)[digit_p]
     209          12 :                                    >> blank_p
     210          24 :                                    >> repeat_p(5)[digit_p]
     211          12 :                                    >> blank_p
     212          24 :                                    >> ( ch_p('n') | ch_p('f') )
     213          24 :                                    >> repeat_p(2)[space_p]
     214             :                                  ) ];
     215             : 
     216          18 :             dict_element= dict_begin | comment | simple_type
     217          18 :                           | array_begin | array_end | dict_end;
     218             : 
     219           6 :             object      = object_begin
     220          12 :                           >> *dict_element
     221          24 :                           >> !stream
     222          18 :                           >> object_end;
     223             : 
     224          18 :             trailer     = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
     225          24 :                           >> *dict_element
     226          24 :                           >> str_p("startxref")
     227          12 :                           >> uint_p
     228          24 :                           >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
     229             : 
     230             :             #ifdef USE_ASSIGN_ACTOR
     231             :             pdfrule     = ! (lexeme_d[
     232             :                                 str_p( "%PDF-" )
     233             :                                 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
     234             :                                 >> ch_p('.')
     235             :                                 >> uint_p[push_back_a(pSelf->m_aUIntStack)]
     236             :                                 >> *((~ch_p('\r') & ~ch_p('\n')))
     237             :                                 >> eol_p
     238             :                              ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
     239             :                           >> *( comment | object | ( xref >> trailer ) );
     240             :             #else
     241          30 :             pdfrule     = ! (lexeme_d[
     242             :                                 str_p( "%PDF-" )
     243          12 :                                 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     244          24 :                                 >> ch_p('.')
     245          24 :                                 >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
     246          24 :                                 >> *((~ch_p('\r') & ~ch_p('\n')))
     247          12 :                                 >> eol_p
     248          24 :                              ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
     249          24 :                           >> *( comment | object | ( xref >> trailer ) );
     250             :             #endif
     251           6 :         }
     252             :         rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
     253             :                          objectref, array, value, dict_element, dict_begin, dict_end,
     254             :                          array_begin, array_end, object, object_begin, object_end,
     255             :                          xref, trailer, pdfrule;
     256             : 
     257           6 :         const rule< ScannerT >& start() const { return pdfrule; }
     258             :     };
     259             : 
     260             :     #ifndef USE_ASSIGN_ACTOR
     261        1422 :     void push_back_action_uint( unsigned int i )
     262             :     {
     263        1422 :         m_aUIntStack.push_back( i );
     264        1422 :     }
     265         582 :     void assign_action_double( double d )
     266             :     {
     267         582 :         m_fDouble = d;
     268         582 :     }
     269             :     #endif
     270             : 
     271           0 :     void parseError( const char* pMessage, iteratorT pLocation )
     272             :     {
     273           0 :         throw_( pLocation, pMessage );
     274           0 :     }
     275             : 
     276        1014 :     OString iteratorToString( iteratorT first, iteratorT last ) const
     277             :     {
     278        1014 :         OStringBuffer aStr( 32 );
     279       10428 :         while( first != last )
     280             :         {
     281        8400 :             aStr.append( *first );
     282        8400 :             ++first;
     283             :         }
     284        1014 :         return aStr.makeStringAndClear();
     285             :     }
     286             : 
     287           6 :     void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
     288             :     {
     289           6 :         if( m_aObjectStack.empty() )
     290             :         {
     291           6 :             PDFFile* pFile = new PDFFile();
     292           6 :             pFile->m_nMinor = m_aUIntStack.back();
     293           6 :             m_aUIntStack.pop_back();
     294           6 :             pFile->m_nMajor = m_aUIntStack.back();
     295           6 :             m_aUIntStack.pop_back();
     296           6 :             m_aObjectStack.push_back( pFile );
     297             :         }
     298             :         else
     299           0 :             parseError( "found file header in unusual place", pBegin );
     300           6 :     }
     301             : 
     302           6 :     void pushComment( iteratorT first, iteratorT last )
     303             :     {
     304             :         // add a comment to the current stack element
     305             :         PDFComment* pComment =
     306           6 :             new PDFComment(iteratorToString(first,last));
     307           6 :         if( m_aObjectStack.empty() )
     308           0 :             m_aObjectStack.push_back( new PDFPart() );
     309           6 :         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
     310           6 :         if( pContainer == NULL )
     311           0 :             parseError( "comment without container", first );
     312           6 :         pContainer->m_aSubElements.push_back( pComment );
     313           6 :     }
     314             : 
     315        2016 :     void insertNewValue( PDFEntry* pNewValue, iteratorT pPos )
     316             :     {
     317        2016 :         PDFContainer* pContainer = NULL;
     318        2016 :         const char* pMsg = NULL;
     319        4032 :         if( ! m_aObjectStack.empty() &&
     320        2016 :             (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != NULL )
     321             :         {
     322        2592 :             if( dynamic_cast<PDFDict*>(pContainer) == NULL      &&
     323         576 :                 dynamic_cast<PDFArray*>(pContainer) == NULL )
     324             :             {
     325         156 :                 PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
     326         156 :                 if( pObj )
     327             :                 {
     328         150 :                     if( pObj->m_pObject == NULL )
     329         150 :                         pObj->m_pObject = pNewValue;
     330             :                     else
     331             :                     {
     332           0 :                         pMsg = "second value for object";
     333           0 :                         pContainer = NULL;
     334             :                     }
     335             :                 }
     336           6 :                 else if( dynamic_cast<PDFDict*>(pNewValue) )
     337             :                 {
     338           6 :                     PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
     339           6 :                     if( pTrailer )
     340             :                     {
     341           6 :                         if( pTrailer->m_pDict == NULL )
     342           6 :                             pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue);
     343             :                         else
     344           0 :                             pContainer = NULL;
     345             :                     }
     346             :                     else
     347           0 :                         pContainer = NULL;
     348             :                 }
     349             :                 else
     350           0 :                     pContainer = NULL;
     351             :             }
     352             :         }
     353        2016 :         if( pContainer )
     354        2016 :             pContainer->m_aSubElements.push_back( pNewValue );
     355             :         else
     356             :         {
     357           0 :             if( ! pMsg )
     358             :             {
     359           0 :                 if( dynamic_cast<PDFContainer*>(pNewValue) )
     360           0 :                     pMsg = "array without container";
     361             :                 else
     362           0 :                     pMsg = "value without container";
     363             :             }
     364           0 :             delete pNewValue;
     365           0 :             parseError( pMsg, pPos );
     366             :         }
     367        2016 :     }
     368             : 
     369         972 :     void pushName( iteratorT first, iteratorT last )
     370             :     {
     371         972 :         insertNewValue( new PDFName(iteratorToString(first,last)), first );
     372         972 :     }
     373             : 
     374         582 :     void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
     375             :     {
     376         582 :         insertNewValue( new PDFNumber(m_fDouble), first );
     377         582 :     }
     378             : 
     379          36 :     void pushString( iteratorT first, iteratorT last )
     380             :     {
     381          36 :         insertNewValue( new PDFString(iteratorToString(first,last)), first );
     382          36 :     }
     383             : 
     384           6 :     void pushBool( iteratorT first, iteratorT last )
     385             :     {
     386           6 :         insertNewValue( new PDFBool( (last-first == 4) ), first );
     387           6 :     }
     388             : 
     389          12 :     void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     390             :     {
     391          12 :         insertNewValue( new PDFNull(), first );
     392          12 :     }
     393             : 
     394             : 
     395         150 :     void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
     396             :     {
     397         150 :         if( m_aObjectStack.empty() )
     398           0 :             m_aObjectStack.push_back( new PDFPart() );
     399             : 
     400         150 :         unsigned int nGeneration = m_aUIntStack.back();
     401         150 :         m_aUIntStack.pop_back();
     402         150 :         unsigned int nObject = m_aUIntStack.back();
     403         150 :         m_aUIntStack.pop_back();
     404             : 
     405         150 :         PDFObject* pObj = new PDFObject( nObject, nGeneration );
     406         150 :         pObj->m_nOffset = first - m_aGlobalBegin;
     407             : 
     408         150 :         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
     409         450 :         if( pContainer &&
     410         300 :             ( dynamic_cast<PDFFile*>(pContainer) ||
     411             :               dynamic_cast<PDFPart*>(pContainer) ) )
     412             :         {
     413         150 :             pContainer->m_aSubElements.push_back( pObj );
     414         150 :             m_aObjectStack.push_back( pObj );
     415             :         }
     416             :         else
     417           0 :             parseError( "object in wrong place", first );
     418         150 :     }
     419             : 
     420         150 :     void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     421             :     {
     422         150 :         if( m_aObjectStack.empty() )
     423           0 :             parseError( "endobj without obj", first );
     424         150 :         else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == NULL )
     425           0 :             parseError( "spurious endobj", first );
     426             :         else
     427         150 :             m_aObjectStack.pop_back();
     428         150 :     }
     429             : 
     430         168 :     void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     431             :     {
     432         168 :         unsigned int nGeneration = m_aUIntStack.back();
     433         168 :         m_aUIntStack.pop_back();
     434         168 :         unsigned int nObject = m_aUIntStack.back();
     435         168 :         m_aUIntStack.pop_back();
     436         168 :         insertNewValue( new PDFObjectRef(nObject,nGeneration), first );
     437         168 :     }
     438             : 
     439         150 :     void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     440             :     {
     441         150 :         PDFDict* pDict = new PDFDict();
     442         150 :         pDict->m_nOffset = first - m_aGlobalBegin;
     443             : 
     444         150 :         insertNewValue( pDict, first );
     445             :         // will not come here if insertion fails (exception)
     446         150 :         m_aObjectStack.push_back( pDict );
     447         150 :     }
     448         150 :     void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     449             :     {
     450         150 :         PDFDict* pDict = NULL;
     451         150 :         if( m_aObjectStack.empty() )
     452           0 :             parseError( "dictionary end without begin", first );
     453         150 :         else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == NULL )
     454           0 :             parseError( "spurious dictionary end", first );
     455             :         else
     456         150 :             m_aObjectStack.pop_back();
     457             : 
     458         150 :         PDFEntry* pOffender = pDict->buildMap();
     459         150 :         if( pOffender )
     460             :         {
     461           0 :             StringEmitContext aCtx;
     462           0 :             aCtx.write( "offending dictionary element: ", 30 );
     463           0 :             pOffender->emit( aCtx );
     464           0 :             m_aErrorString = aCtx.getString();
     465           0 :             parseError( m_aErrorString.getStr(), first );
     466             :         }
     467         150 :     }
     468             : 
     469          90 :     void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     470             :     {
     471          90 :         PDFArray* pArray = new PDFArray();
     472          90 :         pArray->m_nOffset = first - m_aGlobalBegin;
     473             : 
     474          90 :         insertNewValue( pArray, first );
     475             :         // will not come here if insertion fails (exception)
     476          90 :         m_aObjectStack.push_back( pArray );
     477          90 :     }
     478             : 
     479          90 :     void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     480             :     {
     481          90 :         if( m_aObjectStack.empty() )
     482           0 :             parseError( "array end without begin", first );
     483          90 :         else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == NULL )
     484           0 :             parseError( "spurious array end", first );
     485             :         else
     486          90 :             m_aObjectStack.pop_back();
     487          90 :     }
     488             : 
     489          48 :     void emitStream( iteratorT first, iteratorT last )
     490             :     {
     491          48 :         if( m_aObjectStack.empty() )
     492           0 :             parseError( "stream without object", first );
     493          48 :         PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
     494          48 :         if( pObj && pObj->m_pObject )
     495             :         {
     496          48 :             if( pObj->m_pStream )
     497           0 :                 parseError( "multiple streams in object", first );
     498             : 
     499          48 :             PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
     500          48 :             if( pDict )
     501             :             {
     502          48 :                 PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
     503             : 
     504          48 :                 pObj->m_pStream = pStream;
     505          48 :                 pObj->m_aSubElements.push_back( pStream );
     506          48 :             }
     507             :         }
     508             :         else
     509           0 :             parseError( "stream without object", first );
     510          48 :     }
     511             : 
     512           6 :     void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     513             :     {
     514           6 :         if( m_aObjectStack.empty() )
     515           0 :             m_aObjectStack.push_back( new PDFPart() );
     516             : 
     517           6 :         PDFTrailer* pTrailer = new PDFTrailer();
     518           6 :         pTrailer->m_nOffset = first - m_aGlobalBegin;
     519             : 
     520           6 :         PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
     521          18 :         if( pContainer &&
     522          12 :             ( dynamic_cast<PDFFile*>(pContainer) ||
     523             :               dynamic_cast<PDFPart*>(pContainer) ) )
     524             :         {
     525           6 :             pContainer->m_aSubElements.push_back( pTrailer );
     526           6 :             m_aObjectStack.push_back( pTrailer );
     527             :         }
     528             :         else
     529           0 :             parseError( "trailer in wrong place", first );
     530           6 :     }
     531             : 
     532           6 :     void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
     533             :     {
     534           6 :         if( m_aObjectStack.empty() )
     535           0 :             parseError( "%%EOF without trailer", first );
     536           6 :         else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == NULL )
     537           0 :             parseError( "spurious %%EOF", first );
     538             :         else
     539           6 :             m_aObjectStack.pop_back();
     540           6 :     }
     541             : };
     542             : 
     543             : #ifdef WIN32
     544             : PDFEntry* PDFReader::read( const char* pBuffer, unsigned int nLen )
     545             : {
     546             :     PDFGrammar<const char*> aGrammar( pBuffer );
     547             : 
     548             :     try
     549             :     {
     550             : #if OSL_DEBUG_LEVEL > 1
     551             :         boost::spirit::parse_info<const char*> aInfo =
     552             : #endif
     553             :             boost::spirit::parse( pBuffer,
     554             :                                   pBuffer+nLen,
     555             :                                   aGrammar,
     556             :                                   boost::spirit::space_p );
     557             : #if OSL_DEBUG_LEVEL > 1
     558             :         SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop = " << aInfo.stop << " (buff=" << pBuffer << ", offset = " << aInfo.stop - pBuffer << "), hit = " << (aInfo.hit ? OUString("true") : OUString("false")) << ", full = " << (aInfo.full ? OUString("true") : OUString("false")) << ", length = " << (int)aInfo.length );
     559             : #endif
     560             :     }
     561             :     catch( const parser_error<const char*, const char*>& rError )
     562             :     {
     563             : #if OSL_DEBUG_LEVEL > 1
     564             :         OString aTmp;
     565             :         unsigned int nElem = aGrammar.m_aObjectStack.size();
     566             :         for( unsigned int i = 0; i < nElem; i++ )
     567             :             aTmp += "   " + OString(typeid( *(aGrammar.m_aObjectStack[i]) ).name());
     568             : 
     569             :         SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - pBuffer << ", object stack: " << aTmp);
     570             : #endif
     571             :     }
     572             : 
     573             :     PDFEntry* pRet = NULL;
     574             :     unsigned int nEntries = aGrammar.m_aObjectStack.size();
     575             :     if( nEntries == 1 )
     576             :     {
     577             :         pRet = aGrammar.m_aObjectStack.back();
     578             :         aGrammar.m_aObjectStack.pop_back();
     579             :     }
     580             : #if OSL_DEBUG_LEVEL > 1
     581             :     else if( nEntries > 1 )
     582             :         SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse" );
     583             : #endif
     584             : 
     585             :     return pRet;
     586             : }
     587             : #endif
     588             : 
     589           6 : PDFEntry* PDFReader::read( const char* pFileName )
     590             : {
     591             : #ifdef WIN32
     592             :     /* #i106583#
     593             :        since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
     594             :        C++ stdlib istream_iterator does not allow "-" apparently
     595             :        using spirit 2.0 doesn't work in our environment with the MSC
     596             : 
     597             :        So for the time being bite the bullet and read the whole file.
     598             :        FIXME: give Spirit 2.x another try when we upgrade boost again.
     599             :     */
     600             :     PDFEntry* pRet = NULL;
     601             :     FILE* fp = fopen( pFileName, "rb" );
     602             :     if( fp )
     603             :     {
     604             :         fseek( fp, 0, SEEK_END );
     605             :         unsigned int nLen = (unsigned int)ftell( fp );
     606             :         fseek( fp, 0, SEEK_SET );
     607             :         char* pBuf = (char*)rtl_allocateMemory( nLen );
     608             :         if( pBuf )
     609             :         {
     610             :             fread( pBuf, 1, nLen, fp );
     611             :             pRet = read( pBuf, nLen );
     612             :             rtl_freeMemory( pBuf );
     613             :         }
     614             :         fclose( fp );
     615             :     }
     616             :     return pRet;
     617             : #else
     618           6 :     file_iterator<> file_start( pFileName );
     619           6 :     if( ! file_start )
     620           0 :         return NULL;
     621          12 :     file_iterator<> file_end = file_start.make_end();
     622          12 :     PDFGrammar< file_iterator<> > aGrammar( file_start );
     623             : 
     624             :     try
     625             :     {
     626             : #if OSL_DEBUG_LEVEL > 1
     627             :         boost::spirit::parse_info< file_iterator<> > aInfo =
     628             : #endif
     629             :             boost::spirit::parse( file_start,
     630             :                                   file_end,
     631             :                                   aGrammar,
     632           6 :                                   boost::spirit::space_p );
     633             : #if OSL_DEBUG_LEVEL > 1
     634             :         SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop at offset = " << aInfo.stop - file_start << ", hit = " << (aInfo.hit ? "true" : "false") << ", full = " << (aInfo.full ? "true" : "false") << ", length = " << aInfo.length);
     635             : #endif
     636             :     }
     637           0 :     catch( const parser_error< const char*, file_iterator<> >& rError )
     638             :     {
     639             :         SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - file_start);
     640             : #if OSL_DEBUG_LEVEL > 1
     641             :         OUString aTmp;
     642             :         unsigned int nElem = aGrammar.m_aObjectStack.size();
     643             :         for( unsigned int i = 0; i < nElem; i++ )
     644             :         {
     645             :             aTmp += "   ";
     646             :             aTmp += OUString(typeid( *(aGrammar.m_aObjectStack[i]) ).name(),
     647             :                              strlen(typeid( *(aGrammar.m_aObjectStack[i]) ).name()),
     648             :                              RTL_TEXTENCODING_ASCII_US);
     649             :         }
     650             :         SAL_WARN("sdext.pdfimport.pdfparse", "parse error object stack: " << aTmp);
     651             : #endif
     652             :     }
     653             : 
     654           6 :     PDFEntry* pRet = NULL;
     655           6 :     unsigned int nEntries = aGrammar.m_aObjectStack.size();
     656           6 :     if( nEntries == 1 )
     657             :     {
     658           6 :         pRet = aGrammar.m_aObjectStack.back();
     659           6 :         aGrammar.m_aObjectStack.pop_back();
     660             :     }
     661             : #if OSL_DEBUG_LEVEL > 1
     662             :     else if( nEntries > 1 )
     663             :     {
     664             :         SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse");
     665             :         for( unsigned int i = 0; i < nEntries; i++ )
     666             :         {
     667             :             SAL_WARN("sdext.pdfimport.pdfparse", typeid(*aGrammar.m_aObjectStack[i]).name());
     668             :             PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
     669             :             if( pObj )
     670             :                 SAL_WARN("sdext.pdfimport.pdfparse", "   -> object " << pObj->m_nNumber << " generation " << pObj->m_nGeneration);
     671             :             else
     672             :                 SAL_WARN("sdext.pdfimport.pdfparse", "(type " << typeid(*aGrammar.m_aObjectStack[i]).name() << ")");
     673             :         }
     674             :     }
     675             : #endif
     676          12 :     return pRet;
     677             : #endif // WIN32
     678           6 : }
     679             : 
     680             : #if defined _MSC_VER
     681             : #pragma warning(pop)
     682             : #endif
     683             : 
     684             : 
     685             : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */

Generated by: LCOV version 1.10