Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 :
21 : #if defined __SUNPRO_CC
22 : #pragma disable_warn
23 : #elif defined _MSC_VER
24 : #pragma warning(push, 1)
25 : #endif
26 :
27 : #include "pdfparse.hxx"
28 :
29 : // workaround windows compiler: do not include multi_pass.hpp
30 : #include <boost/spirit/include/classic_core.hpp>
31 : #include <boost/spirit/include/classic_utility.hpp>
32 : #include <boost/spirit/include/classic_error_handling.hpp>
33 : #include <boost/spirit/include/classic_file_iterator.hpp>
34 : #include <boost/bind.hpp>
35 : #include <string.h>
36 :
37 : #include <rtl/strbuf.hxx>
38 : #include <rtl/alloc.h>
39 :
40 : // disable warnings again because someone along the line has enabled them
41 : #if defined __SUNPRO_CC
42 : #pragma disable_warn
43 : #elif defined _MSC_VER
44 : #pragma warning(push, 1)
45 : #endif
46 :
47 : using namespace boost::spirit;
48 : using namespace pdfparse;
49 :
50 : using ::rtl::OString;
51 : using ::rtl::OStringBuffer;
52 :
53 : class StringEmitContext : public EmitContext
54 : {
55 : OStringBuffer m_aBuf;
56 : public:
57 0 : StringEmitContext() : EmitContext(), m_aBuf(256) {}
58 0 : virtual ~StringEmitContext() {}
59 0 : virtual bool write( const void* pBuf, unsigned int nLen ) throw()
60 : {
61 0 : m_aBuf.append( (const sal_Char*)pBuf, nLen );
62 0 : return true;
63 : }
64 0 : virtual unsigned int getCurPos() throw() { return m_aBuf.getLength(); }
65 0 : virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw()
66 0 : { return (nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) ?
67 0 : write( m_aBuf.getStr() + nOrigOffset, nLen ) : false; }
68 0 : virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw()
69 : {
70 0 : if( nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) )
71 : {
72 0 : memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
73 0 : return nLen;
74 : }
75 0 : return 0;
76 : }
77 :
78 0 : OString getString() { return m_aBuf.makeStringAndClear(); }
79 : };
80 :
81 : template< class iteratorT >
82 : class PDFGrammar : public grammar< PDFGrammar<iteratorT> >
83 : {
84 : public:
85 :
86 3 : PDFGrammar( const iteratorT& first )
87 3 : : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
88 3 : ~PDFGrammar()
89 : {
90 3 : if( !m_aObjectStack.empty() )
91 0 : delete m_aObjectStack.front();
92 3 : }
93 :
94 : double m_fDouble;
95 : std::vector< unsigned int > m_aUIntStack;
96 : std::vector< PDFEntry* > m_aObjectStack;
97 : rtl::OString m_aErrorString;
98 : iteratorT m_aGlobalBegin;
99 :
100 : public:
101 : struct pdf_string_parser
102 : {
103 : typedef nil_t result_t;
104 : template <typename ScannerT>
105 : std::ptrdiff_t
106 6 : operator()(ScannerT const& scan, result_t&) const
107 : {
108 6 : std::ptrdiff_t len = 0;
109 :
110 6 : int nBraceLevel = 0;
111 174 : while( ! scan.at_end() )
112 : {
113 168 : char c = *scan;
114 168 : if( c == ')' )
115 : {
116 6 : nBraceLevel--;
117 6 : if( nBraceLevel < 0 )
118 6 : break;
119 : }
120 162 : else if( c == '(' )
121 0 : nBraceLevel++;
122 162 : else if( c == '\\' ) // ignore escaped braces
123 : {
124 0 : ++len;
125 0 : ++scan;
126 0 : if( scan.at_end() )
127 0 : break;
128 : }
129 162 : ++len;
130 162 : ++scan;
131 : }
132 6 : return scan.at_end() ? -1 : len;
133 : }
134 : };
135 :
136 : template< typename ScannerT >
137 3 : struct definition
138 : {
139 3 : definition( const PDFGrammar<iteratorT>& rSelf )
140 3 : {
141 3 : PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
142 :
143 : // workaround workshop compiler: comment_p doesn't work
144 : // comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
145 3 : comment = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
146 :
147 3 : boolean = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
148 :
149 : // workaround workshop compiler: confix_p doesn't work
150 : //stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
151 3 : stream = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
152 :
153 3 : name = lexeme_d[
154 : ch_p('/')
155 : >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
156 : [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
157 :
158 : // workaround workshop compiler: confix_p doesn't work
159 : //stringtype = ( confix_p("(",*anychar_p, ")") |
160 : // confix_p("<",*xdigit_p, ">") )
161 : // [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
162 :
163 3 : stringtype = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
164 : ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
165 : [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
166 :
167 3 : null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
168 :
169 : #ifdef USE_ASSIGN_ACTOR
170 : objectref = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
171 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
172 : >> ch_p('R')
173 : >> eps_p
174 : )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
175 : #else
176 3 : objectref = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
177 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
178 : >> ch_p('R')
179 : >> eps_p
180 : )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
181 : #endif
182 :
183 : #ifdef USE_ASSIGN_ACTOR
184 : simple_type = objectref | name |
185 : ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
186 : [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
187 : | stringtype | boolean | null_object;
188 : #else
189 3 : simple_type = objectref | name |
190 : ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
191 : [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
192 : | stringtype | boolean | null_object;
193 : #endif
194 :
195 3 : dict_begin = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
196 3 : dict_end = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
197 :
198 3 : array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
199 3 : array_end = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
200 :
201 : #ifdef USE_ASSIGN_ACTOR
202 : object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
203 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
204 : >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
205 : #else
206 3 : object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
207 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
208 : >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
209 : #endif
210 3 : object_end = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
211 :
212 3 : xref = str_p( "xref" ) >> uint_p >> uint_p
213 : >> lexeme_d[
214 : +( repeat_p(10)[digit_p]
215 : >> blank_p
216 : >> repeat_p(5)[digit_p]
217 : >> blank_p
218 : >> ( ch_p('n') | ch_p('f') )
219 : >> repeat_p(2)[space_p]
220 : ) ];
221 :
222 3 : dict_element= dict_begin | comment | simple_type
223 : | array_begin | array_end | dict_end;
224 :
225 3 : object = object_begin
226 : >> *dict_element
227 : >> !stream
228 : >> object_end;
229 :
230 3 : trailer = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
231 : >> *dict_element
232 : >> str_p("startxref")
233 : >> uint_p
234 : >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
235 :
236 : #ifdef USE_ASSIGN_ACTOR
237 : pdfrule = ! (lexeme_d[
238 : str_p( "%PDF-" )
239 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
240 : >> ch_p('.')
241 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
242 : >> *((~ch_p('\r') & ~ch_p('\n')))
243 : >> eol_p
244 : ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
245 : >> *( comment | object | ( xref >> trailer ) );
246 : #else
247 3 : pdfrule = ! (lexeme_d[
248 : str_p( "%PDF-" )
249 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
250 : >> ch_p('.')
251 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
252 : >> *((~ch_p('\r') & ~ch_p('\n')))
253 : >> eol_p
254 : ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
255 : >> *( comment | object | ( xref >> trailer ) );
256 : #endif
257 3 : }
258 : rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
259 : objectref, array, value, dict_element, dict_begin, dict_end,
260 : array_begin, array_end, object, object_begin, object_end,
261 : xref, trailer, pdfrule;
262 :
263 3 : const rule< ScannerT >& start() const { return pdfrule; }
264 : };
265 :
266 : #ifndef USE_ASSIGN_ACTOR
267 711 : void push_back_action_uint( unsigned int i )
268 : {
269 711 : m_aUIntStack.push_back( i );
270 711 : }
271 291 : void assign_action_double( double d )
272 : {
273 291 : m_fDouble = d;
274 291 : }
275 : #endif
276 :
277 0 : void parseError( const char* pMessage, iteratorT pLocation )
278 : {
279 0 : throw_( pLocation, pMessage );
280 0 : }
281 :
282 507 : rtl::OString iteratorToString( iteratorT first, iteratorT last ) const
283 : {
284 507 : rtl::OStringBuffer aStr( 32 );
285 5214 : while( first != last )
286 : {
287 4200 : aStr.append( *first );
288 4200 : ++first;
289 : }
290 507 : return aStr.makeStringAndClear();
291 : }
292 :
293 3 : void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
294 : {
295 3 : if( m_aObjectStack.empty() )
296 : {
297 3 : PDFFile* pFile = new PDFFile();
298 3 : pFile->m_nMinor = m_aUIntStack.back();
299 3 : m_aUIntStack.pop_back();
300 3 : pFile->m_nMajor = m_aUIntStack.back();
301 3 : m_aUIntStack.pop_back();
302 3 : m_aObjectStack.push_back( pFile );
303 : }
304 : else
305 0 : parseError( "found file header in unusual place", pBegin );
306 3 : }
307 :
308 3 : void pushComment( iteratorT first, iteratorT last )
309 : {
310 : // add a comment to the current stack element
311 : PDFComment* pComment =
312 3 : new PDFComment(iteratorToString(first,last));
313 3 : if( m_aObjectStack.empty() )
314 0 : m_aObjectStack.push_back( new PDFPart() );
315 3 : PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
316 3 : if( pContainer == NULL )
317 0 : parseError( "comment without container", first );
318 3 : pContainer->m_aSubElements.push_back( pComment );
319 3 : }
320 :
321 1008 : void insertNewValue( PDFEntry* pNewValue, iteratorT pPos )
322 : {
323 1008 : PDFContainer* pContainer = NULL;
324 1008 : const char* pMsg = NULL;
325 1008 : if( ! m_aObjectStack.empty() &&
326 : (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != NULL )
327 : {
328 1008 : if( dynamic_cast<PDFDict*>(pContainer) == NULL &&
329 : dynamic_cast<PDFArray*>(pContainer) == NULL )
330 : {
331 78 : PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
332 78 : if( pObj )
333 : {
334 75 : if( pObj->m_pObject == NULL )
335 75 : pObj->m_pObject = pNewValue;
336 : else
337 : {
338 0 : pMsg = "second value for object";
339 0 : pContainer = NULL;
340 : }
341 : }
342 3 : else if( dynamic_cast<PDFDict*>(pNewValue) )
343 : {
344 3 : PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
345 3 : if( pTrailer )
346 : {
347 3 : if( pTrailer->m_pDict == NULL )
348 3 : pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue);
349 : else
350 0 : pContainer = NULL;
351 : }
352 : else
353 0 : pContainer = NULL;
354 : }
355 : else
356 0 : pContainer = NULL;
357 : }
358 : }
359 1008 : if( pContainer )
360 1008 : pContainer->m_aSubElements.push_back( pNewValue );
361 : else
362 : {
363 0 : if( ! pMsg )
364 : {
365 0 : if( dynamic_cast<PDFContainer*>(pNewValue) )
366 0 : pMsg = "array without container";
367 : else
368 0 : pMsg = "value without container";
369 : }
370 0 : delete pNewValue;
371 0 : parseError( pMsg, pPos );
372 : }
373 1008 : }
374 :
375 486 : void pushName( iteratorT first, iteratorT last )
376 : {
377 486 : insertNewValue( new PDFName(iteratorToString(first,last)), first );
378 486 : }
379 :
380 291 : void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
381 : {
382 291 : insertNewValue( new PDFNumber(m_fDouble), first );
383 291 : }
384 :
385 18 : void pushString( iteratorT first, iteratorT last )
386 : {
387 18 : insertNewValue( new PDFString(iteratorToString(first,last)), first );
388 18 : }
389 :
390 3 : void pushBool( iteratorT first, iteratorT last )
391 : {
392 3 : insertNewValue( new PDFBool( (last-first == 4) ), first );
393 3 : }
394 :
395 6 : void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
396 : {
397 6 : insertNewValue( new PDFNull(), first );
398 6 : }
399 :
400 :
401 75 : void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
402 : {
403 75 : if( m_aObjectStack.empty() )
404 0 : m_aObjectStack.push_back( new PDFPart() );
405 :
406 75 : unsigned int nGeneration = m_aUIntStack.back();
407 75 : m_aUIntStack.pop_back();
408 75 : unsigned int nObject = m_aUIntStack.back();
409 75 : m_aUIntStack.pop_back();
410 :
411 75 : PDFObject* pObj = new PDFObject( nObject, nGeneration );
412 75 : pObj->m_nOffset = first - m_aGlobalBegin;
413 :
414 75 : PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
415 75 : if( pContainer &&
416 : ( dynamic_cast<PDFFile*>(pContainer) ||
417 : dynamic_cast<PDFPart*>(pContainer) ) )
418 : {
419 75 : pContainer->m_aSubElements.push_back( pObj );
420 75 : m_aObjectStack.push_back( pObj );
421 : }
422 : else
423 0 : parseError( "object in wrong place", first );
424 75 : }
425 :
426 75 : void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
427 : {
428 75 : if( m_aObjectStack.empty() )
429 0 : parseError( "endobj without obj", first );
430 75 : else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == NULL )
431 0 : parseError( "spurious endobj", first );
432 : else
433 75 : m_aObjectStack.pop_back();
434 75 : }
435 :
436 84 : void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
437 : {
438 84 : unsigned int nGeneration = m_aUIntStack.back();
439 84 : m_aUIntStack.pop_back();
440 84 : unsigned int nObject = m_aUIntStack.back();
441 84 : m_aUIntStack.pop_back();
442 84 : insertNewValue( new PDFObjectRef(nObject,nGeneration), first );
443 84 : }
444 :
445 75 : void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
446 : {
447 75 : PDFDict* pDict = new PDFDict();
448 75 : pDict->m_nOffset = first - m_aGlobalBegin;
449 :
450 75 : insertNewValue( pDict, first );
451 : // will not come here if insertion fails (exception)
452 75 : m_aObjectStack.push_back( pDict );
453 75 : }
454 75 : void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
455 : {
456 75 : PDFDict* pDict = NULL;
457 75 : if( m_aObjectStack.empty() )
458 0 : parseError( "dictionary end without begin", first );
459 75 : else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == NULL )
460 0 : parseError( "spurious dictionary end", first );
461 : else
462 75 : m_aObjectStack.pop_back();
463 :
464 75 : PDFEntry* pOffender = pDict->buildMap();
465 75 : if( pOffender )
466 : {
467 0 : StringEmitContext aCtx;
468 0 : aCtx.write( "offending dictionary element: ", 30 );
469 0 : pOffender->emit( aCtx );
470 0 : m_aErrorString = aCtx.getString();
471 0 : parseError( m_aErrorString.getStr(), first );
472 : }
473 75 : }
474 :
475 45 : void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
476 : {
477 45 : PDFArray* pArray = new PDFArray();
478 45 : pArray->m_nOffset = first - m_aGlobalBegin;
479 :
480 45 : insertNewValue( pArray, first );
481 : // will not come here if insertion fails (exception)
482 45 : m_aObjectStack.push_back( pArray );
483 45 : }
484 :
485 45 : void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
486 : {
487 45 : if( m_aObjectStack.empty() )
488 0 : parseError( "array end without begin", first );
489 45 : else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == NULL )
490 0 : parseError( "spurious array end", first );
491 : else
492 45 : m_aObjectStack.pop_back();
493 45 : }
494 :
495 24 : void emitStream( iteratorT first, iteratorT last )
496 : {
497 24 : if( m_aObjectStack.empty() )
498 0 : parseError( "stream without object", first );
499 24 : PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
500 24 : if( pObj && pObj->m_pObject )
501 : {
502 24 : if( pObj->m_pStream )
503 0 : parseError( "multiple streams in object", first );
504 :
505 24 : PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
506 24 : if( pDict )
507 : {
508 24 : PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
509 :
510 24 : pObj->m_pStream = pStream;
511 24 : pObj->m_aSubElements.push_back( pStream );
512 : }
513 : }
514 : else
515 0 : parseError( "stream without object", first );
516 24 : }
517 :
518 3 : void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
519 : {
520 3 : if( m_aObjectStack.empty() )
521 0 : m_aObjectStack.push_back( new PDFPart() );
522 :
523 3 : PDFTrailer* pTrailer = new PDFTrailer();
524 3 : pTrailer->m_nOffset = first - m_aGlobalBegin;
525 :
526 3 : PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
527 3 : if( pContainer &&
528 : ( dynamic_cast<PDFFile*>(pContainer) ||
529 : dynamic_cast<PDFPart*>(pContainer) ) )
530 : {
531 3 : pContainer->m_aSubElements.push_back( pTrailer );
532 3 : m_aObjectStack.push_back( pTrailer );
533 : }
534 : else
535 0 : parseError( "trailer in wrong place", first );
536 3 : }
537 :
538 3 : void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
539 : {
540 3 : if( m_aObjectStack.empty() )
541 0 : parseError( "%%EOF without trailer", first );
542 3 : else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == NULL )
543 0 : parseError( "spurious %%EOF", first );
544 : else
545 3 : m_aObjectStack.pop_back();
546 3 : }
547 : };
548 :
549 : #ifdef WIN32
550 : PDFEntry* PDFReader::read( const char* pBuffer, unsigned int nLen )
551 : {
552 : PDFGrammar<const char*> aGrammar( pBuffer );
553 :
554 : try
555 : {
556 : #if OSL_DEBUG_LEVEL > 1
557 : boost::spirit::parse_info<const char*> aInfo =
558 : #endif
559 : boost::spirit::parse( pBuffer,
560 : pBuffer+nLen,
561 : aGrammar,
562 : boost::spirit::space_p );
563 : #if OSL_DEBUG_LEVEL > 1
564 : fprintf( stderr, "parseinfo: stop = %p (buff=%p, offset = %d), hit = %s, full = %s, length = %d\n",
565 : aInfo.stop, pBuffer, aInfo.stop - pBuffer,
566 : aInfo.hit ? "true" : "false",
567 : aInfo.full ? "true" : "false",
568 : (int)aInfo.length );
569 : #endif
570 : }
571 : catch( const parser_error<const char*, const char*>& rError )
572 : {
573 : #if OSL_DEBUG_LEVEL > 1
574 : fprintf( stderr, "parse error: %s at buffer pos %u\nobject stack:\n",
575 : rError.descriptor, rError.where - pBuffer );
576 : unsigned int nElem = aGrammar.m_aObjectStack.size();
577 : for( unsigned int i = 0; i < nElem; i++ )
578 : {
579 : fprintf( stderr, " %s\n", typeid( *(aGrammar.m_aObjectStack[i]) ).name() );
580 : }
581 : #endif
582 : }
583 :
584 : PDFEntry* pRet = NULL;
585 : unsigned int nEntries = aGrammar.m_aObjectStack.size();
586 : if( nEntries == 1 )
587 : {
588 : pRet = aGrammar.m_aObjectStack.back();
589 : aGrammar.m_aObjectStack.pop_back();
590 : }
591 : #if OSL_DEBUG_LEVEL > 1
592 : else if( nEntries > 1 )
593 : fprintf( stderr, "error got %u stack objects in parse\n", nEntries );
594 : #endif
595 :
596 : return pRet;
597 : }
598 : #endif
599 :
600 3 : PDFEntry* PDFReader::read( const char* pFileName )
601 : {
602 : #ifdef WIN32
603 : /* #i106583#
604 : since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
605 : C++ stdlib istream_iterator does not allow "-" apparently
606 : using spirit 2.0 doesn't work in our environment with the MSC
607 :
608 : So for the time being bite the bullet and read the whole file.
609 : FIXME: give Spirit 2.x another try when we upgrade boost again.
610 : */
611 : PDFEntry* pRet = NULL;
612 : FILE* fp = fopen( pFileName, "rb" );
613 : if( fp )
614 : {
615 : fseek( fp, 0, SEEK_END );
616 : unsigned int nLen = (unsigned int)ftell( fp );
617 : fseek( fp, 0, SEEK_SET );
618 : char* pBuf = (char*)rtl_allocateMemory( nLen );
619 : if( pBuf )
620 : {
621 : fread( pBuf, 1, nLen, fp );
622 : pRet = read( pBuf, nLen );
623 : rtl_freeMemory( pBuf );
624 : }
625 : fclose( fp );
626 : }
627 : return pRet;
628 : #else
629 3 : file_iterator<> file_start( pFileName );
630 3 : if( ! file_start )
631 0 : return NULL;
632 3 : file_iterator<> file_end = file_start.make_end();
633 3 : PDFGrammar< file_iterator<> > aGrammar( file_start );
634 :
635 : try
636 : {
637 : #if OSL_DEBUG_LEVEL > 1
638 : boost::spirit::parse_info< file_iterator<> > aInfo =
639 : #endif
640 : boost::spirit::parse( file_start,
641 : file_end,
642 : aGrammar,
643 3 : boost::spirit::space_p );
644 : #if OSL_DEBUG_LEVEL > 1
645 : fprintf( stderr, "parseinfo: stop at offset = %ld, hit = %s, full = %s, length = %lu\n",
646 : aInfo.stop - file_start,
647 : aInfo.hit ? "true" : "false",
648 : aInfo.full ? "true" : "false",
649 : aInfo.length );
650 : #endif
651 : }
652 0 : catch( const parser_error< const char*, file_iterator<> >& rError )
653 : {
654 : #if OSL_DEBUG_LEVEL > 1
655 : fprintf( stderr, "parse error: %s at buffer pos %lu\nobject stack:\n",
656 : rError.descriptor, rError.where - file_start );
657 : size_t nElem = aGrammar.m_aObjectStack.size();
658 : for( size_t i = 0; i < nElem; ++i )
659 : {
660 : fprintf( stderr, " %s\n", typeid( *(aGrammar.m_aObjectStack[i]) ).name() );
661 : }
662 : #endif
663 : }
664 :
665 3 : PDFEntry* pRet = NULL;
666 3 : unsigned int nEntries = aGrammar.m_aObjectStack.size();
667 3 : if( nEntries == 1 )
668 : {
669 3 : pRet = aGrammar.m_aObjectStack.back();
670 3 : aGrammar.m_aObjectStack.pop_back();
671 : }
672 : #if OSL_DEBUG_LEVEL > 1
673 : else if( nEntries > 1 )
674 : {
675 : fprintf( stderr, "error got %u stack objects in parse\n", nEntries );
676 : for( unsigned int i = 0; i < nEntries; i++ )
677 : {
678 : fprintf( stderr, "%s\n", typeid(*aGrammar.m_aObjectStack[i]).name() );
679 : PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
680 : if( pObj )
681 : fprintf( stderr, " -> object %d generation %d\n", pObj->m_nNumber, pObj->m_nGeneration );
682 : else
683 : fprintf( stderr, "(type %s)\n", typeid(*aGrammar.m_aObjectStack[i]).name() );
684 : }
685 : }
686 : #endif
687 3 : return pRet;
688 : #endif // WIN32
689 3 : }
690 :
691 : #if defined __SUNPRO_CC
692 : #pragma enable_warn
693 : #elif defined _MSC_VER
694 : #pragma warning(pop)
695 : #endif
696 :
697 :
698 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|