Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 :
21 : #if defined __SUNPRO_CC
22 : #pragma disable_warn
23 : #elif defined _MSC_VER
24 : #pragma warning(push, 1)
25 : #endif
26 :
27 : #include "pdfparse.hxx"
28 :
29 : // workaround windows compiler: do not include multi_pass.hpp
30 : #include <boost/spirit/include/classic_core.hpp>
31 : #include <boost/spirit/include/classic_utility.hpp>
32 : #include <boost/spirit/include/classic_error_handling.hpp>
33 : #include <boost/spirit/include/classic_file_iterator.hpp>
34 : #include <boost/bind.hpp>
35 : #include <string.h>
36 :
37 : #include <rtl/strbuf.hxx>
38 : #include <rtl/alloc.h>
39 :
40 : // disable warnings again because someone along the line has enabled them
41 : #if defined __SUNPRO_CC
42 : #pragma disable_warn
43 : #elif defined _MSC_VER
44 : #pragma warning(push, 1)
45 : #endif
46 :
47 : using namespace boost::spirit;
48 : using namespace pdfparse;
49 :
50 :
51 : class StringEmitContext : public EmitContext
52 : {
53 : OStringBuffer m_aBuf;
54 : public:
55 0 : StringEmitContext() : EmitContext(), m_aBuf(256) {}
56 0 : virtual ~StringEmitContext() {}
57 0 : virtual bool write( const void* pBuf, unsigned int nLen ) throw()
58 : {
59 0 : m_aBuf.append( (const sal_Char*)pBuf, nLen );
60 0 : return true;
61 : }
62 0 : virtual unsigned int getCurPos() throw() { return m_aBuf.getLength(); }
63 0 : virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw()
64 0 : { return (nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) ?
65 0 : write( m_aBuf.getStr() + nOrigOffset, nLen ) : false; }
66 0 : virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw()
67 : {
68 0 : if( nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) )
69 : {
70 0 : memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
71 0 : return nLen;
72 : }
73 0 : return 0;
74 : }
75 :
76 0 : OString getString() { return m_aBuf.makeStringAndClear(); }
77 : };
78 :
79 : template< class iteratorT >
80 : class PDFGrammar : public grammar< PDFGrammar<iteratorT> >
81 : {
82 : public:
83 :
84 3 : PDFGrammar( const iteratorT& first )
85 3 : : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
86 3 : ~PDFGrammar()
87 : {
88 3 : if( !m_aObjectStack.empty() )
89 0 : delete m_aObjectStack.front();
90 3 : }
91 :
92 : double m_fDouble;
93 : std::vector< unsigned int > m_aUIntStack;
94 : std::vector< PDFEntry* > m_aObjectStack;
95 : OString m_aErrorString;
96 : iteratorT m_aGlobalBegin;
97 :
98 : public:
99 : struct pdf_string_parser
100 : {
101 : typedef nil_t result_t;
102 : template <typename ScannerT>
103 : std::ptrdiff_t
104 6 : operator()(ScannerT const& scan, result_t&) const
105 : {
106 6 : std::ptrdiff_t len = 0;
107 :
108 6 : int nBraceLevel = 0;
109 174 : while( ! scan.at_end() )
110 : {
111 168 : char c = *scan;
112 168 : if( c == ')' )
113 : {
114 6 : nBraceLevel--;
115 6 : if( nBraceLevel < 0 )
116 6 : break;
117 : }
118 162 : else if( c == '(' )
119 0 : nBraceLevel++;
120 162 : else if( c == '\\' ) // ignore escaped braces
121 : {
122 0 : ++len;
123 0 : ++scan;
124 0 : if( scan.at_end() )
125 0 : break;
126 : }
127 162 : ++len;
128 162 : ++scan;
129 : }
130 6 : return scan.at_end() ? -1 : len;
131 : }
132 : };
133 :
134 : template< typename ScannerT >
135 3 : struct definition
136 : {
137 3 : definition( const PDFGrammar<iteratorT>& rSelf )
138 3 : {
139 3 : PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
140 :
141 : // workaround workshop compiler: comment_p doesn't work
142 : // comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
143 3 : comment = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
144 :
145 3 : boolean = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
146 :
147 : // workaround workshop compiler: confix_p doesn't work
148 : //stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
149 3 : stream = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
150 :
151 3 : name = lexeme_d[
152 : ch_p('/')
153 : >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
154 3 : [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
155 :
156 : // workaround workshop compiler: confix_p doesn't work
157 : //stringtype = ( confix_p("(",*anychar_p, ")") |
158 : // confix_p("<",*xdigit_p, ">") )
159 : // [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
160 :
161 9 : stringtype = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
162 6 : ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
163 12 : [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
164 :
165 3 : null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
166 :
167 : #ifdef USE_ASSIGN_ACTOR
168 : objectref = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
169 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
170 : >> ch_p('R')
171 : >> eps_p
172 : )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
173 : #else
174 9 : objectref = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
175 12 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
176 12 : >> ch_p('R')
177 6 : >> eps_p
178 12 : )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
179 : #endif
180 :
181 : #ifdef USE_ASSIGN_ACTOR
182 : simple_type = objectref | name |
183 : ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
184 : [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
185 : | stringtype | boolean | null_object;
186 : #else
187 9 : simple_type = objectref | name |
188 6 : ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
189 12 : [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
190 9 : | stringtype | boolean | null_object;
191 : #endif
192 :
193 3 : dict_begin = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
194 3 : dict_end = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
195 :
196 3 : array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
197 3 : array_end = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
198 :
199 : #ifdef USE_ASSIGN_ACTOR
200 : object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
201 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
202 : >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
203 : #else
204 9 : object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
205 12 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
206 12 : >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
207 : #endif
208 3 : object_end = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
209 :
210 9 : xref = str_p( "xref" ) >> uint_p >> uint_p
211 12 : >> lexeme_d[
212 6 : +( repeat_p(10)[digit_p]
213 6 : >> blank_p
214 12 : >> repeat_p(5)[digit_p]
215 6 : >> blank_p
216 12 : >> ( ch_p('n') | ch_p('f') )
217 12 : >> repeat_p(2)[space_p]
218 : ) ];
219 :
220 9 : dict_element= dict_begin | comment | simple_type
221 9 : | array_begin | array_end | dict_end;
222 :
223 3 : object = object_begin
224 6 : >> *dict_element
225 12 : >> !stream
226 9 : >> object_end;
227 :
228 9 : trailer = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
229 12 : >> *dict_element
230 12 : >> str_p("startxref")
231 6 : >> uint_p
232 12 : >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
233 :
234 : #ifdef USE_ASSIGN_ACTOR
235 : pdfrule = ! (lexeme_d[
236 : str_p( "%PDF-" )
237 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
238 : >> ch_p('.')
239 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
240 : >> *((~ch_p('\r') & ~ch_p('\n')))
241 : >> eol_p
242 : ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
243 : >> *( comment | object | ( xref >> trailer ) );
244 : #else
245 15 : pdfrule = ! (lexeme_d[
246 : str_p( "%PDF-" )
247 6 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
248 12 : >> ch_p('.')
249 12 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
250 12 : >> *((~ch_p('\r') & ~ch_p('\n')))
251 6 : >> eol_p
252 12 : ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
253 12 : >> *( comment | object | ( xref >> trailer ) );
254 : #endif
255 3 : }
256 : rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
257 : objectref, array, value, dict_element, dict_begin, dict_end,
258 : array_begin, array_end, object, object_begin, object_end,
259 : xref, trailer, pdfrule;
260 :
261 3 : const rule< ScannerT >& start() const { return pdfrule; }
262 : };
263 :
264 : #ifndef USE_ASSIGN_ACTOR
265 711 : void push_back_action_uint( unsigned int i )
266 : {
267 711 : m_aUIntStack.push_back( i );
268 711 : }
269 291 : void assign_action_double( double d )
270 : {
271 291 : m_fDouble = d;
272 291 : }
273 : #endif
274 :
275 0 : void parseError( const char* pMessage, iteratorT pLocation )
276 : {
277 0 : throw_( pLocation, pMessage );
278 0 : }
279 :
280 507 : OString iteratorToString( iteratorT first, iteratorT last ) const
281 : {
282 507 : OStringBuffer aStr( 32 );
283 5214 : while( first != last )
284 : {
285 4200 : aStr.append( *first );
286 4200 : ++first;
287 : }
288 507 : return aStr.makeStringAndClear();
289 : }
290 :
291 3 : void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
292 : {
293 3 : if( m_aObjectStack.empty() )
294 : {
295 3 : PDFFile* pFile = new PDFFile();
296 3 : pFile->m_nMinor = m_aUIntStack.back();
297 3 : m_aUIntStack.pop_back();
298 3 : pFile->m_nMajor = m_aUIntStack.back();
299 3 : m_aUIntStack.pop_back();
300 3 : m_aObjectStack.push_back( pFile );
301 : }
302 : else
303 0 : parseError( "found file header in unusual place", pBegin );
304 3 : }
305 :
306 3 : void pushComment( iteratorT first, iteratorT last )
307 : {
308 : // add a comment to the current stack element
309 : PDFComment* pComment =
310 3 : new PDFComment(iteratorToString(first,last));
311 3 : if( m_aObjectStack.empty() )
312 0 : m_aObjectStack.push_back( new PDFPart() );
313 3 : PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
314 3 : if( pContainer == NULL )
315 0 : parseError( "comment without container", first );
316 3 : pContainer->m_aSubElements.push_back( pComment );
317 3 : }
318 :
319 1008 : void insertNewValue( PDFEntry* pNewValue, iteratorT pPos )
320 : {
321 1008 : PDFContainer* pContainer = NULL;
322 1008 : const char* pMsg = NULL;
323 2016 : if( ! m_aObjectStack.empty() &&
324 1008 : (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != NULL )
325 : {
326 1296 : if( dynamic_cast<PDFDict*>(pContainer) == NULL &&
327 288 : dynamic_cast<PDFArray*>(pContainer) == NULL )
328 : {
329 78 : PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
330 78 : if( pObj )
331 : {
332 75 : if( pObj->m_pObject == NULL )
333 75 : pObj->m_pObject = pNewValue;
334 : else
335 : {
336 0 : pMsg = "second value for object";
337 0 : pContainer = NULL;
338 : }
339 : }
340 3 : else if( dynamic_cast<PDFDict*>(pNewValue) )
341 : {
342 3 : PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
343 3 : if( pTrailer )
344 : {
345 3 : if( pTrailer->m_pDict == NULL )
346 3 : pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue);
347 : else
348 0 : pContainer = NULL;
349 : }
350 : else
351 0 : pContainer = NULL;
352 : }
353 : else
354 0 : pContainer = NULL;
355 : }
356 : }
357 1008 : if( pContainer )
358 1008 : pContainer->m_aSubElements.push_back( pNewValue );
359 : else
360 : {
361 0 : if( ! pMsg )
362 : {
363 0 : if( dynamic_cast<PDFContainer*>(pNewValue) )
364 0 : pMsg = "array without container";
365 : else
366 0 : pMsg = "value without container";
367 : }
368 0 : delete pNewValue;
369 0 : parseError( pMsg, pPos );
370 : }
371 1008 : }
372 :
373 486 : void pushName( iteratorT first, iteratorT last )
374 : {
375 486 : insertNewValue( new PDFName(iteratorToString(first,last)), first );
376 486 : }
377 :
378 291 : void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
379 : {
380 291 : insertNewValue( new PDFNumber(m_fDouble), first );
381 291 : }
382 :
383 18 : void pushString( iteratorT first, iteratorT last )
384 : {
385 18 : insertNewValue( new PDFString(iteratorToString(first,last)), first );
386 18 : }
387 :
388 3 : void pushBool( iteratorT first, iteratorT last )
389 : {
390 3 : insertNewValue( new PDFBool( (last-first == 4) ), first );
391 3 : }
392 :
393 6 : void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
394 : {
395 6 : insertNewValue( new PDFNull(), first );
396 6 : }
397 :
398 :
399 75 : void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
400 : {
401 75 : if( m_aObjectStack.empty() )
402 0 : m_aObjectStack.push_back( new PDFPart() );
403 :
404 75 : unsigned int nGeneration = m_aUIntStack.back();
405 75 : m_aUIntStack.pop_back();
406 75 : unsigned int nObject = m_aUIntStack.back();
407 75 : m_aUIntStack.pop_back();
408 :
409 75 : PDFObject* pObj = new PDFObject( nObject, nGeneration );
410 75 : pObj->m_nOffset = first - m_aGlobalBegin;
411 :
412 75 : PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
413 225 : if( pContainer &&
414 150 : ( dynamic_cast<PDFFile*>(pContainer) ||
415 : dynamic_cast<PDFPart*>(pContainer) ) )
416 : {
417 75 : pContainer->m_aSubElements.push_back( pObj );
418 75 : m_aObjectStack.push_back( pObj );
419 : }
420 : else
421 0 : parseError( "object in wrong place", first );
422 75 : }
423 :
424 75 : void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
425 : {
426 75 : if( m_aObjectStack.empty() )
427 0 : parseError( "endobj without obj", first );
428 75 : else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == NULL )
429 0 : parseError( "spurious endobj", first );
430 : else
431 75 : m_aObjectStack.pop_back();
432 75 : }
433 :
434 84 : void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
435 : {
436 84 : unsigned int nGeneration = m_aUIntStack.back();
437 84 : m_aUIntStack.pop_back();
438 84 : unsigned int nObject = m_aUIntStack.back();
439 84 : m_aUIntStack.pop_back();
440 84 : insertNewValue( new PDFObjectRef(nObject,nGeneration), first );
441 84 : }
442 :
443 75 : void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
444 : {
445 75 : PDFDict* pDict = new PDFDict();
446 75 : pDict->m_nOffset = first - m_aGlobalBegin;
447 :
448 75 : insertNewValue( pDict, first );
449 : // will not come here if insertion fails (exception)
450 75 : m_aObjectStack.push_back( pDict );
451 75 : }
452 75 : void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
453 : {
454 75 : PDFDict* pDict = NULL;
455 75 : if( m_aObjectStack.empty() )
456 0 : parseError( "dictionary end without begin", first );
457 75 : else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == NULL )
458 0 : parseError( "spurious dictionary end", first );
459 : else
460 75 : m_aObjectStack.pop_back();
461 :
462 75 : PDFEntry* pOffender = pDict->buildMap();
463 75 : if( pOffender )
464 : {
465 0 : StringEmitContext aCtx;
466 0 : aCtx.write( "offending dictionary element: ", 30 );
467 0 : pOffender->emit( aCtx );
468 0 : m_aErrorString = aCtx.getString();
469 0 : parseError( m_aErrorString.getStr(), first );
470 : }
471 75 : }
472 :
473 45 : void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
474 : {
475 45 : PDFArray* pArray = new PDFArray();
476 45 : pArray->m_nOffset = first - m_aGlobalBegin;
477 :
478 45 : insertNewValue( pArray, first );
479 : // will not come here if insertion fails (exception)
480 45 : m_aObjectStack.push_back( pArray );
481 45 : }
482 :
483 45 : void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
484 : {
485 45 : if( m_aObjectStack.empty() )
486 0 : parseError( "array end without begin", first );
487 45 : else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == NULL )
488 0 : parseError( "spurious array end", first );
489 : else
490 45 : m_aObjectStack.pop_back();
491 45 : }
492 :
493 24 : void emitStream( iteratorT first, iteratorT last )
494 : {
495 24 : if( m_aObjectStack.empty() )
496 0 : parseError( "stream without object", first );
497 24 : PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
498 24 : if( pObj && pObj->m_pObject )
499 : {
500 24 : if( pObj->m_pStream )
501 0 : parseError( "multiple streams in object", first );
502 :
503 24 : PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
504 24 : if( pDict )
505 : {
506 24 : PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
507 :
508 24 : pObj->m_pStream = pStream;
509 24 : pObj->m_aSubElements.push_back( pStream );
510 24 : }
511 : }
512 : else
513 0 : parseError( "stream without object", first );
514 24 : }
515 :
516 3 : void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
517 : {
518 3 : if( m_aObjectStack.empty() )
519 0 : m_aObjectStack.push_back( new PDFPart() );
520 :
521 3 : PDFTrailer* pTrailer = new PDFTrailer();
522 3 : pTrailer->m_nOffset = first - m_aGlobalBegin;
523 :
524 3 : PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
525 9 : if( pContainer &&
526 6 : ( dynamic_cast<PDFFile*>(pContainer) ||
527 : dynamic_cast<PDFPart*>(pContainer) ) )
528 : {
529 3 : pContainer->m_aSubElements.push_back( pTrailer );
530 3 : m_aObjectStack.push_back( pTrailer );
531 : }
532 : else
533 0 : parseError( "trailer in wrong place", first );
534 3 : }
535 :
536 3 : void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
537 : {
538 3 : if( m_aObjectStack.empty() )
539 0 : parseError( "%%EOF without trailer", first );
540 3 : else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == NULL )
541 0 : parseError( "spurious %%EOF", first );
542 : else
543 3 : m_aObjectStack.pop_back();
544 3 : }
545 : };
546 :
547 : #ifdef WIN32
548 : PDFEntry* PDFReader::read( const char* pBuffer, unsigned int nLen )
549 : {
550 : PDFGrammar<const char*> aGrammar( pBuffer );
551 :
552 : try
553 : {
554 : #if OSL_DEBUG_LEVEL > 1
555 : boost::spirit::parse_info<const char*> aInfo =
556 : #endif
557 : boost::spirit::parse( pBuffer,
558 : pBuffer+nLen,
559 : aGrammar,
560 : boost::spirit::space_p );
561 : #if OSL_DEBUG_LEVEL > 1
562 : SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop = " << aInfo.stop << " (buff=" << pBuffer << ", offset = " << aInfo.stop - pBuffer << "), hit = " << aInfo.hit ? "true" : "false" << ", full = " << aInfo.full ? "true" : "false" << ", length = " << (int)aInfo.length );
563 : #endif
564 : }
565 : catch( const parser_error<const char*, const char*>& rError )
566 : {
567 : #if OSL_DEBUG_LEVEL > 1
568 : OUString aTmp;
569 : unsigned int nElem = aGrammar.m_aObjectStack.size()
570 : for( unsigned int i = 0; i < nElem; i++ )
571 : aTmp += " " + OUString(typeid( *(aGrammar.m_aObjectStack[i]) ).name());
572 :
573 : SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - pBuffer << ", object stack: " << aTmp);
574 : #endif
575 : }
576 :
577 : PDFEntry* pRet = NULL;
578 : unsigned int nEntries = aGrammar.m_aObjectStack.size();
579 : if( nEntries == 1 )
580 : {
581 : pRet = aGrammar.m_aObjectStack.back();
582 : aGrammar.m_aObjectStack.pop_back();
583 : }
584 : #if OSL_DEBUG_LEVEL > 1
585 : else if( nEntries > 1 )
586 : SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse" );
587 : #endif
588 :
589 : return pRet;
590 : }
591 : #endif
592 :
593 3 : PDFEntry* PDFReader::read( const char* pFileName )
594 : {
595 : #ifdef WIN32
596 : /* #i106583#
597 : since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
598 : C++ stdlib istream_iterator does not allow "-" apparently
599 : using spirit 2.0 doesn't work in our environment with the MSC
600 :
601 : So for the time being bite the bullet and read the whole file.
602 : FIXME: give Spirit 2.x another try when we upgrade boost again.
603 : */
604 : PDFEntry* pRet = NULL;
605 : FILE* fp = fopen( pFileName, "rb" );
606 : if( fp )
607 : {
608 : fseek( fp, 0, SEEK_END );
609 : unsigned int nLen = (unsigned int)ftell( fp );
610 : fseek( fp, 0, SEEK_SET );
611 : char* pBuf = (char*)rtl_allocateMemory( nLen );
612 : if( pBuf )
613 : {
614 : fread( pBuf, 1, nLen, fp );
615 : pRet = read( pBuf, nLen );
616 : rtl_freeMemory( pBuf );
617 : }
618 : fclose( fp );
619 : }
620 : return pRet;
621 : #else
622 3 : file_iterator<> file_start( pFileName );
623 3 : if( ! file_start )
624 0 : return NULL;
625 6 : file_iterator<> file_end = file_start.make_end();
626 6 : PDFGrammar< file_iterator<> > aGrammar( file_start );
627 :
628 : try
629 : {
630 : #if OSL_DEBUG_LEVEL > 1
631 : boost::spirit::parse_info< file_iterator<> > aInfo =
632 : #endif
633 : boost::spirit::parse( file_start,
634 : file_end,
635 : aGrammar,
636 3 : boost::spirit::space_p );
637 : #if OSL_DEBUG_LEVEL > 1
638 : SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop at offset = " << aInfo.stop - file_start << ", hit = " << (aInfo.hit ? "true" : "false") << ", full = " << (aInfo.full ? "true" : "false") << ", length = " << aInfo.length);
639 : #endif
640 : }
641 0 : catch( const parser_error< const char*, file_iterator<> >& rError )
642 : {
643 : #if OSL_DEBUG_LEVEL > 1
644 : OUString aTmp;
645 : unsigned int nElem = aGrammar.m_aObjectStack.size();
646 : for( unsigned int i = 0; i < nElem; i++ )
647 : {
648 : aTmp += " ";
649 : aTmp += OUString(typeid( *(aGrammar.m_aObjectStack[i]) ).name(),
650 : strlen(typeid( *(aGrammar.m_aObjectStack[i]) ).name()),
651 : RTL_TEXTENCODING_ASCII_US);
652 : }
653 : SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - file_start << ", object stack: " << aTmp);
654 : #endif
655 : }
656 :
657 3 : PDFEntry* pRet = NULL;
658 3 : unsigned int nEntries = aGrammar.m_aObjectStack.size();
659 3 : if( nEntries == 1 )
660 : {
661 3 : pRet = aGrammar.m_aObjectStack.back();
662 3 : aGrammar.m_aObjectStack.pop_back();
663 : }
664 : #if OSL_DEBUG_LEVEL > 1
665 : else if( nEntries > 1 )
666 : {
667 : SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse");
668 : for( unsigned int i = 0; i < nEntries; i++ )
669 : {
670 : SAL_WARN("sdext.pdfimport.pdfparse", typeid(*aGrammar.m_aObjectStack[i]).name());
671 : PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
672 : if( pObj )
673 : SAL_WARN("sdext.pdfimport.pdfparse", " -> object " << pObj->m_nNumber << " generation " << pObj->m_nGeneration);
674 : else
675 : SAL_WARN("sdext.pdfimport.pdfparse", "(type " << typeid(*aGrammar.m_aObjectStack[i]).name() << ")");
676 : }
677 : }
678 : #endif
679 6 : return pRet;
680 : #endif // WIN32
681 3 : }
682 :
683 : #if defined __SUNPRO_CC
684 : #pragma enable_warn
685 : #elif defined _MSC_VER
686 : #pragma warning(pop)
687 : #endif
688 :
689 :
690 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|