Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 :
21 : #if defined _MSC_VER
22 : #pragma warning(push, 1)
23 : #endif
24 :
25 : #include "pdfparse.hxx"
26 :
27 : // workaround windows compiler: do not include multi_pass.hpp
28 : #include <boost/spirit/include/classic_core.hpp>
29 : #include <boost/spirit/include/classic_utility.hpp>
30 : #include <boost/spirit/include/classic_error_handling.hpp>
31 : #include <boost/spirit/include/classic_file_iterator.hpp>
32 : #include <boost/bind.hpp>
33 : #include <string.h>
34 :
35 : #include <rtl/strbuf.hxx>
36 : #include <rtl/alloc.h>
37 :
38 : // disable warnings again because someone along the line has enabled them
39 : #if defined _MSC_VER
40 : #pragma warning(push, 1)
41 : #endif
42 :
43 : using namespace boost::spirit;
44 : using namespace pdfparse;
45 :
46 :
47 : class StringEmitContext : public EmitContext
48 : {
49 : OStringBuffer m_aBuf;
50 : public:
51 0 : StringEmitContext() : EmitContext(), m_aBuf(256) {}
52 0 : virtual ~StringEmitContext() {}
53 0 : virtual bool write( const void* pBuf, unsigned int nLen ) throw() SAL_OVERRIDE
54 : {
55 0 : m_aBuf.append( (const sal_Char*)pBuf, nLen );
56 0 : return true;
57 : }
58 0 : virtual unsigned int getCurPos() throw() SAL_OVERRIDE { return m_aBuf.getLength(); }
59 0 : virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw() SAL_OVERRIDE
60 0 : { return (nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) ?
61 0 : write( m_aBuf.getStr() + nOrigOffset, nLen ) : false; }
62 0 : virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw() SAL_OVERRIDE
63 : {
64 0 : if( nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) )
65 : {
66 0 : memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
67 0 : return nLen;
68 : }
69 0 : return 0;
70 : }
71 :
72 0 : OString getString() { return m_aBuf.makeStringAndClear(); }
73 : };
74 :
75 : template< class iteratorT >
76 : class PDFGrammar : public grammar< PDFGrammar<iteratorT> >
77 : {
78 : public:
79 :
80 3 : PDFGrammar( const iteratorT& first )
81 3 : : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
82 3 : ~PDFGrammar()
83 : {
84 3 : if( !m_aObjectStack.empty() )
85 0 : delete m_aObjectStack.front();
86 3 : }
87 :
88 : double m_fDouble;
89 : std::vector< unsigned int > m_aUIntStack;
90 : std::vector< PDFEntry* > m_aObjectStack;
91 : OString m_aErrorString;
92 : iteratorT m_aGlobalBegin;
93 :
94 : public:
95 : struct pdf_string_parser
96 : {
97 : typedef nil_t result_t;
98 : template <typename ScannerT>
99 : std::ptrdiff_t
100 6 : operator()(ScannerT const& scan, result_t&) const
101 : {
102 6 : std::ptrdiff_t len = 0;
103 :
104 6 : int nBraceLevel = 0;
105 174 : while( ! scan.at_end() )
106 : {
107 168 : char c = *scan;
108 168 : if( c == ')' )
109 : {
110 6 : nBraceLevel--;
111 6 : if( nBraceLevel < 0 )
112 6 : break;
113 : }
114 162 : else if( c == '(' )
115 0 : nBraceLevel++;
116 162 : else if( c == '\\' ) // ignore escaped braces
117 : {
118 0 : ++len;
119 0 : ++scan;
120 0 : if( scan.at_end() )
121 0 : break;
122 : }
123 162 : ++len;
124 162 : ++scan;
125 : }
126 6 : return scan.at_end() ? -1 : len;
127 : }
128 : };
129 :
130 : template< typename ScannerT >
131 3 : struct definition
132 : {
133 3 : definition( const PDFGrammar<iteratorT>& rSelf )
134 3 : {
135 3 : PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
136 :
137 : // workaround workshop compiler: comment_p doesn't work
138 : // comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
139 3 : comment = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
140 :
141 3 : boolean = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
142 :
143 : // workaround workshop compiler: confix_p doesn't work
144 : //stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
145 3 : stream = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
146 :
147 3 : name = lexeme_d[
148 : ch_p('/')
149 : >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
150 3 : [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
151 :
152 : // workaround workshop compiler: confix_p doesn't work
153 : //stringtype = ( confix_p("(",*anychar_p, ")") |
154 : // confix_p("<",*xdigit_p, ">") )
155 : // [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
156 :
157 9 : stringtype = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
158 6 : ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
159 12 : [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
160 :
161 3 : null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
162 :
163 : #ifdef USE_ASSIGN_ACTOR
164 : objectref = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
165 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
166 : >> ch_p('R')
167 : >> eps_p
168 : )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
169 : #else
170 9 : objectref = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
171 12 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
172 12 : >> ch_p('R')
173 6 : >> eps_p
174 12 : )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
175 : #endif
176 :
177 : #ifdef USE_ASSIGN_ACTOR
178 : simple_type = objectref | name |
179 : ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
180 : [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
181 : | stringtype | boolean | null_object;
182 : #else
183 9 : simple_type = objectref | name |
184 6 : ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
185 12 : [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
186 9 : | stringtype | boolean | null_object;
187 : #endif
188 :
189 3 : dict_begin = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
190 3 : dict_end = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
191 :
192 3 : array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
193 3 : array_end = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
194 :
195 : #ifdef USE_ASSIGN_ACTOR
196 : object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
197 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
198 : >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
199 : #else
200 9 : object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
201 12 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
202 12 : >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
203 : #endif
204 3 : object_end = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
205 :
206 9 : xref = str_p( "xref" ) >> uint_p >> uint_p
207 12 : >> lexeme_d[
208 6 : +( repeat_p(10)[digit_p]
209 6 : >> blank_p
210 12 : >> repeat_p(5)[digit_p]
211 6 : >> blank_p
212 12 : >> ( ch_p('n') | ch_p('f') )
213 12 : >> repeat_p(2)[space_p]
214 : ) ];
215 :
216 9 : dict_element= dict_begin | comment | simple_type
217 9 : | array_begin | array_end | dict_end;
218 :
219 3 : object = object_begin
220 6 : >> *dict_element
221 12 : >> !stream
222 9 : >> object_end;
223 :
224 9 : trailer = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
225 12 : >> *dict_element
226 12 : >> str_p("startxref")
227 6 : >> uint_p
228 12 : >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
229 :
230 : #ifdef USE_ASSIGN_ACTOR
231 : pdfrule = ! (lexeme_d[
232 : str_p( "%PDF-" )
233 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
234 : >> ch_p('.')
235 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
236 : >> *((~ch_p('\r') & ~ch_p('\n')))
237 : >> eol_p
238 : ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
239 : >> *( comment | object | ( xref >> trailer ) );
240 : #else
241 15 : pdfrule = ! (lexeme_d[
242 : str_p( "%PDF-" )
243 6 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
244 12 : >> ch_p('.')
245 12 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
246 12 : >> *((~ch_p('\r') & ~ch_p('\n')))
247 6 : >> eol_p
248 12 : ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
249 12 : >> *( comment | object | ( xref >> trailer ) );
250 : #endif
251 3 : }
252 : rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
253 : objectref, array, value, dict_element, dict_begin, dict_end,
254 : array_begin, array_end, object, object_begin, object_end,
255 : xref, trailer, pdfrule;
256 :
257 3 : const rule< ScannerT >& start() const { return pdfrule; }
258 : };
259 :
260 : #ifndef USE_ASSIGN_ACTOR
261 711 : void push_back_action_uint( unsigned int i )
262 : {
263 711 : m_aUIntStack.push_back( i );
264 711 : }
265 291 : void assign_action_double( double d )
266 : {
267 291 : m_fDouble = d;
268 291 : }
269 : #endif
270 :
271 0 : void parseError( const char* pMessage, iteratorT pLocation )
272 : {
273 0 : throw_( pLocation, pMessage );
274 0 : }
275 :
276 507 : OString iteratorToString( iteratorT first, iteratorT last ) const
277 : {
278 507 : OStringBuffer aStr( 32 );
279 5214 : while( first != last )
280 : {
281 4200 : aStr.append( *first );
282 4200 : ++first;
283 : }
284 507 : return aStr.makeStringAndClear();
285 : }
286 :
287 3 : void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
288 : {
289 3 : if( m_aObjectStack.empty() )
290 : {
291 3 : PDFFile* pFile = new PDFFile();
292 3 : pFile->m_nMinor = m_aUIntStack.back();
293 3 : m_aUIntStack.pop_back();
294 3 : pFile->m_nMajor = m_aUIntStack.back();
295 3 : m_aUIntStack.pop_back();
296 3 : m_aObjectStack.push_back( pFile );
297 : }
298 : else
299 0 : parseError( "found file header in unusual place", pBegin );
300 3 : }
301 :
302 3 : void pushComment( iteratorT first, iteratorT last )
303 : {
304 : // add a comment to the current stack element
305 : PDFComment* pComment =
306 3 : new PDFComment(iteratorToString(first,last));
307 3 : if( m_aObjectStack.empty() )
308 0 : m_aObjectStack.push_back( new PDFPart() );
309 3 : PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
310 3 : if( pContainer == NULL )
311 0 : parseError( "comment without container", first );
312 3 : pContainer->m_aSubElements.push_back( pComment );
313 3 : }
314 :
315 1008 : void insertNewValue( PDFEntry* pNewValue, iteratorT pPos )
316 : {
317 1008 : PDFContainer* pContainer = NULL;
318 1008 : const char* pMsg = NULL;
319 2016 : if( ! m_aObjectStack.empty() &&
320 1008 : (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != NULL )
321 : {
322 1296 : if( dynamic_cast<PDFDict*>(pContainer) == NULL &&
323 288 : dynamic_cast<PDFArray*>(pContainer) == NULL )
324 : {
325 78 : PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
326 78 : if( pObj )
327 : {
328 75 : if( pObj->m_pObject == NULL )
329 75 : pObj->m_pObject = pNewValue;
330 : else
331 : {
332 0 : pMsg = "second value for object";
333 0 : pContainer = NULL;
334 : }
335 : }
336 3 : else if( dynamic_cast<PDFDict*>(pNewValue) )
337 : {
338 3 : PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
339 3 : if( pTrailer )
340 : {
341 3 : if( pTrailer->m_pDict == NULL )
342 3 : pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue);
343 : else
344 0 : pContainer = NULL;
345 : }
346 : else
347 0 : pContainer = NULL;
348 : }
349 : else
350 0 : pContainer = NULL;
351 : }
352 : }
353 1008 : if( pContainer )
354 1008 : pContainer->m_aSubElements.push_back( pNewValue );
355 : else
356 : {
357 0 : if( ! pMsg )
358 : {
359 0 : if( dynamic_cast<PDFContainer*>(pNewValue) )
360 0 : pMsg = "array without container";
361 : else
362 0 : pMsg = "value without container";
363 : }
364 0 : delete pNewValue;
365 0 : parseError( pMsg, pPos );
366 : }
367 1008 : }
368 :
369 486 : void pushName( iteratorT first, iteratorT last )
370 : {
371 486 : insertNewValue( new PDFName(iteratorToString(first,last)), first );
372 486 : }
373 :
374 291 : void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
375 : {
376 291 : insertNewValue( new PDFNumber(m_fDouble), first );
377 291 : }
378 :
379 18 : void pushString( iteratorT first, iteratorT last )
380 : {
381 18 : insertNewValue( new PDFString(iteratorToString(first,last)), first );
382 18 : }
383 :
384 3 : void pushBool( iteratorT first, iteratorT last )
385 : {
386 3 : insertNewValue( new PDFBool( (last-first == 4) ), first );
387 3 : }
388 :
389 6 : void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
390 : {
391 6 : insertNewValue( new PDFNull(), first );
392 6 : }
393 :
394 :
395 75 : void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
396 : {
397 75 : if( m_aObjectStack.empty() )
398 0 : m_aObjectStack.push_back( new PDFPart() );
399 :
400 75 : unsigned int nGeneration = m_aUIntStack.back();
401 75 : m_aUIntStack.pop_back();
402 75 : unsigned int nObject = m_aUIntStack.back();
403 75 : m_aUIntStack.pop_back();
404 :
405 75 : PDFObject* pObj = new PDFObject( nObject, nGeneration );
406 75 : pObj->m_nOffset = first - m_aGlobalBegin;
407 :
408 75 : PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
409 225 : if( pContainer &&
410 150 : ( dynamic_cast<PDFFile*>(pContainer) ||
411 : dynamic_cast<PDFPart*>(pContainer) ) )
412 : {
413 75 : pContainer->m_aSubElements.push_back( pObj );
414 75 : m_aObjectStack.push_back( pObj );
415 : }
416 : else
417 0 : parseError( "object in wrong place", first );
418 75 : }
419 :
420 75 : void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
421 : {
422 75 : if( m_aObjectStack.empty() )
423 0 : parseError( "endobj without obj", first );
424 75 : else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == NULL )
425 0 : parseError( "spurious endobj", first );
426 : else
427 75 : m_aObjectStack.pop_back();
428 75 : }
429 :
430 84 : void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
431 : {
432 84 : unsigned int nGeneration = m_aUIntStack.back();
433 84 : m_aUIntStack.pop_back();
434 84 : unsigned int nObject = m_aUIntStack.back();
435 84 : m_aUIntStack.pop_back();
436 84 : insertNewValue( new PDFObjectRef(nObject,nGeneration), first );
437 84 : }
438 :
439 75 : void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
440 : {
441 75 : PDFDict* pDict = new PDFDict();
442 75 : pDict->m_nOffset = first - m_aGlobalBegin;
443 :
444 75 : insertNewValue( pDict, first );
445 : // will not come here if insertion fails (exception)
446 75 : m_aObjectStack.push_back( pDict );
447 75 : }
448 75 : void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
449 : {
450 75 : PDFDict* pDict = NULL;
451 75 : if( m_aObjectStack.empty() )
452 0 : parseError( "dictionary end without begin", first );
453 75 : else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == NULL )
454 0 : parseError( "spurious dictionary end", first );
455 : else
456 75 : m_aObjectStack.pop_back();
457 :
458 75 : PDFEntry* pOffender = pDict->buildMap();
459 75 : if( pOffender )
460 : {
461 0 : StringEmitContext aCtx;
462 0 : aCtx.write( "offending dictionary element: ", 30 );
463 0 : pOffender->emit( aCtx );
464 0 : m_aErrorString = aCtx.getString();
465 0 : parseError( m_aErrorString.getStr(), first );
466 : }
467 75 : }
468 :
469 45 : void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
470 : {
471 45 : PDFArray* pArray = new PDFArray();
472 45 : pArray->m_nOffset = first - m_aGlobalBegin;
473 :
474 45 : insertNewValue( pArray, first );
475 : // will not come here if insertion fails (exception)
476 45 : m_aObjectStack.push_back( pArray );
477 45 : }
478 :
479 45 : void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
480 : {
481 45 : if( m_aObjectStack.empty() )
482 0 : parseError( "array end without begin", first );
483 45 : else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == NULL )
484 0 : parseError( "spurious array end", first );
485 : else
486 45 : m_aObjectStack.pop_back();
487 45 : }
488 :
489 24 : void emitStream( iteratorT first, iteratorT last )
490 : {
491 24 : if( m_aObjectStack.empty() )
492 0 : parseError( "stream without object", first );
493 24 : PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
494 24 : if( pObj && pObj->m_pObject )
495 : {
496 24 : if( pObj->m_pStream )
497 0 : parseError( "multiple streams in object", first );
498 :
499 24 : PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
500 24 : if( pDict )
501 : {
502 24 : PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
503 :
504 24 : pObj->m_pStream = pStream;
505 24 : pObj->m_aSubElements.push_back( pStream );
506 24 : }
507 : }
508 : else
509 0 : parseError( "stream without object", first );
510 24 : }
511 :
512 3 : void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
513 : {
514 3 : if( m_aObjectStack.empty() )
515 0 : m_aObjectStack.push_back( new PDFPart() );
516 :
517 3 : PDFTrailer* pTrailer = new PDFTrailer();
518 3 : pTrailer->m_nOffset = first - m_aGlobalBegin;
519 :
520 3 : PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
521 9 : if( pContainer &&
522 6 : ( dynamic_cast<PDFFile*>(pContainer) ||
523 : dynamic_cast<PDFPart*>(pContainer) ) )
524 : {
525 3 : pContainer->m_aSubElements.push_back( pTrailer );
526 3 : m_aObjectStack.push_back( pTrailer );
527 : }
528 : else
529 0 : parseError( "trailer in wrong place", first );
530 3 : }
531 :
532 3 : void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
533 : {
534 3 : if( m_aObjectStack.empty() )
535 0 : parseError( "%%EOF without trailer", first );
536 3 : else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == NULL )
537 0 : parseError( "spurious %%EOF", first );
538 : else
539 3 : m_aObjectStack.pop_back();
540 3 : }
541 : };
542 :
543 : #ifdef WIN32
544 : PDFEntry* PDFReader::read( const char* pBuffer, unsigned int nLen )
545 : {
546 : PDFGrammar<const char*> aGrammar( pBuffer );
547 :
548 : try
549 : {
550 : #if OSL_DEBUG_LEVEL > 1
551 : boost::spirit::parse_info<const char*> aInfo =
552 : #endif
553 : boost::spirit::parse( pBuffer,
554 : pBuffer+nLen,
555 : aGrammar,
556 : boost::spirit::space_p );
557 : #if OSL_DEBUG_LEVEL > 1
558 : SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop = " << aInfo.stop << " (buff=" << pBuffer << ", offset = " << aInfo.stop - pBuffer << "), hit = " << (aInfo.hit ? OUString("true") : OUString("false")) << ", full = " << (aInfo.full ? OUString("true") : OUString("false")) << ", length = " << (int)aInfo.length );
559 : #endif
560 : }
561 : catch( const parser_error<const char*, const char*>& rError )
562 : {
563 : #if OSL_DEBUG_LEVEL > 1
564 : OString aTmp;
565 : unsigned int nElem = aGrammar.m_aObjectStack.size();
566 : for( unsigned int i = 0; i < nElem; i++ )
567 : aTmp += " " + OString(typeid( *(aGrammar.m_aObjectStack[i]) ).name());
568 :
569 : SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - pBuffer << ", object stack: " << aTmp);
570 : #endif
571 : }
572 :
573 : PDFEntry* pRet = NULL;
574 : unsigned int nEntries = aGrammar.m_aObjectStack.size();
575 : if( nEntries == 1 )
576 : {
577 : pRet = aGrammar.m_aObjectStack.back();
578 : aGrammar.m_aObjectStack.pop_back();
579 : }
580 : #if OSL_DEBUG_LEVEL > 1
581 : else if( nEntries > 1 )
582 : SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse" );
583 : #endif
584 :
585 : return pRet;
586 : }
587 : #endif
588 :
589 3 : PDFEntry* PDFReader::read( const char* pFileName )
590 : {
591 : #ifdef WIN32
592 : /* #i106583#
593 : since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
594 : C++ stdlib istream_iterator does not allow "-" apparently
595 : using spirit 2.0 doesn't work in our environment with the MSC
596 :
597 : So for the time being bite the bullet and read the whole file.
598 : FIXME: give Spirit 2.x another try when we upgrade boost again.
599 : */
600 : PDFEntry* pRet = NULL;
601 : FILE* fp = fopen( pFileName, "rb" );
602 : if( fp )
603 : {
604 : fseek( fp, 0, SEEK_END );
605 : unsigned int nLen = (unsigned int)ftell( fp );
606 : fseek( fp, 0, SEEK_SET );
607 : char* pBuf = (char*)rtl_allocateMemory( nLen );
608 : if( pBuf )
609 : {
610 : fread( pBuf, 1, nLen, fp );
611 : pRet = read( pBuf, nLen );
612 : rtl_freeMemory( pBuf );
613 : }
614 : fclose( fp );
615 : }
616 : return pRet;
617 : #else
618 3 : file_iterator<> file_start( pFileName );
619 3 : if( ! file_start )
620 0 : return NULL;
621 6 : file_iterator<> file_end = file_start.make_end();
622 6 : PDFGrammar< file_iterator<> > aGrammar( file_start );
623 :
624 : try
625 : {
626 : #if OSL_DEBUG_LEVEL > 1
627 : boost::spirit::parse_info< file_iterator<> > aInfo =
628 : #endif
629 : boost::spirit::parse( file_start,
630 : file_end,
631 : aGrammar,
632 3 : boost::spirit::space_p );
633 : #if OSL_DEBUG_LEVEL > 1
634 : SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop at offset = " << aInfo.stop - file_start << ", hit = " << (aInfo.hit ? "true" : "false") << ", full = " << (aInfo.full ? "true" : "false") << ", length = " << aInfo.length);
635 : #endif
636 : }
637 0 : catch( const parser_error< const char*, file_iterator<> >& rError )
638 : {
639 : SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - file_start);
640 : #if OSL_DEBUG_LEVEL > 1
641 : OUString aTmp;
642 : unsigned int nElem = aGrammar.m_aObjectStack.size();
643 : for( unsigned int i = 0; i < nElem; i++ )
644 : {
645 : aTmp += " ";
646 : aTmp += OUString(typeid( *(aGrammar.m_aObjectStack[i]) ).name(),
647 : strlen(typeid( *(aGrammar.m_aObjectStack[i]) ).name()),
648 : RTL_TEXTENCODING_ASCII_US);
649 : }
650 : SAL_WARN("sdext.pdfimport.pdfparse", "parse error object stack: " << aTmp);
651 : #endif
652 : }
653 :
654 3 : PDFEntry* pRet = NULL;
655 3 : unsigned int nEntries = aGrammar.m_aObjectStack.size();
656 3 : if( nEntries == 1 )
657 : {
658 3 : pRet = aGrammar.m_aObjectStack.back();
659 3 : aGrammar.m_aObjectStack.pop_back();
660 : }
661 : #if OSL_DEBUG_LEVEL > 1
662 : else if( nEntries > 1 )
663 : {
664 : SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse");
665 : for( unsigned int i = 0; i < nEntries; i++ )
666 : {
667 : SAL_WARN("sdext.pdfimport.pdfparse", typeid(*aGrammar.m_aObjectStack[i]).name());
668 : PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
669 : if( pObj )
670 : SAL_WARN("sdext.pdfimport.pdfparse", " -> object " << pObj->m_nNumber << " generation " << pObj->m_nGeneration);
671 : else
672 : SAL_WARN("sdext.pdfimport.pdfparse", "(type " << typeid(*aGrammar.m_aObjectStack[i]).name() << ")");
673 : }
674 : }
675 : #endif
676 6 : return pRet;
677 : #endif // WIN32
678 3 : }
679 :
680 : #if defined _MSC_VER
681 : #pragma warning(pop)
682 : #endif
683 :
684 :
685 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|