Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 :
21 : #if defined _MSC_VER
22 : #pragma warning(push, 1)
23 : #endif
24 :
25 : #include "pdfparse.hxx"
26 :
27 : // workaround windows compiler: do not include multi_pass.hpp
28 : #include <boost/spirit/include/classic_core.hpp>
29 : #include <boost/spirit/include/classic_utility.hpp>
30 : #include <boost/spirit/include/classic_error_handling.hpp>
31 : #include <boost/spirit/include/classic_file_iterator.hpp>
32 : #include <boost/bind.hpp>
33 : #include <string.h>
34 :
35 : #include <rtl/strbuf.hxx>
36 : #include <rtl/alloc.h>
37 :
38 : // disable warnings again because someone along the line has enabled them
39 : #if defined _MSC_VER
40 : #pragma warning(push, 1)
41 : #endif
42 :
43 : using namespace boost::spirit;
44 : using namespace pdfparse;
45 :
46 :
47 : class StringEmitContext : public EmitContext
48 : {
49 : OStringBuffer m_aBuf;
50 : public:
51 0 : StringEmitContext() : EmitContext(), m_aBuf(256) {}
52 0 : virtual ~StringEmitContext() {}
53 0 : virtual bool write( const void* pBuf, unsigned int nLen ) throw() SAL_OVERRIDE
54 : {
55 0 : m_aBuf.append( static_cast<const sal_Char*>(pBuf), nLen );
56 0 : return true;
57 : }
58 0 : virtual unsigned int getCurPos() throw() SAL_OVERRIDE { return m_aBuf.getLength(); }
59 0 : virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw() SAL_OVERRIDE
60 0 : { return (nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) ) &&
61 0 : write( m_aBuf.getStr() + nOrigOffset, nLen ); }
62 0 : virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw() SAL_OVERRIDE
63 : {
64 0 : if( nOrigOffset+nLen < static_cast<unsigned int>(m_aBuf.getLength()) )
65 : {
66 0 : memcpy( pBuf, m_aBuf.getStr()+nOrigOffset, nLen );
67 0 : return nLen;
68 : }
69 0 : return 0;
70 : }
71 :
72 0 : OString getString() { return m_aBuf.makeStringAndClear(); }
73 : };
74 :
75 : template< class iteratorT >
76 : class PDFGrammar : public grammar< PDFGrammar<iteratorT> >
77 : {
78 : public:
79 :
80 89 : PDFGrammar( const iteratorT& first )
81 89 : : m_fDouble( 0.0 ), m_aGlobalBegin( first ) {}
82 89 : ~PDFGrammar()
83 : {
84 89 : if( !m_aObjectStack.empty() )
85 0 : delete m_aObjectStack.front();
86 89 : }
87 :
88 : double m_fDouble;
89 : std::vector< unsigned int > m_aUIntStack;
90 : std::vector< PDFEntry* > m_aObjectStack;
91 : OString m_aErrorString;
92 : iteratorT m_aGlobalBegin;
93 :
94 : public:
95 : struct pdf_string_parser
96 : {
97 : typedef nil_t result_t;
98 : template <typename ScannerT>
99 : std::ptrdiff_t
100 10 : operator()(ScannerT const& scan, result_t&) const
101 : {
102 10 : std::ptrdiff_t len = 0;
103 :
104 10 : int nBraceLevel = 0;
105 238 : while( ! scan.at_end() )
106 : {
107 228 : char c = *scan;
108 228 : if( c == ')' )
109 : {
110 10 : nBraceLevel--;
111 10 : if( nBraceLevel < 0 )
112 10 : break;
113 : }
114 218 : else if( c == '(' )
115 0 : nBraceLevel++;
116 218 : else if( c == '\\' ) // ignore escaped braces
117 : {
118 0 : ++len;
119 0 : ++scan.first; // tdf#63054: avoid skipping spaces
120 0 : if( scan.first == scan.last ) // tdf#63054: avoid skipping spaces
121 0 : break;
122 : }
123 218 : ++len;
124 218 : ++scan;
125 : }
126 10 : return scan.at_end() ? -1 : len;
127 : }
128 : };
129 :
130 : template< typename ScannerT >
131 89 : struct definition
132 : {
133 89 : definition( const PDFGrammar<iteratorT>& rSelf )
134 89 : {
135 89 : PDFGrammar<iteratorT>* pSelf = const_cast< PDFGrammar<iteratorT>* >( &rSelf );
136 :
137 : // workaround workshop compiler: comment_p doesn't work
138 : // comment = comment_p("%")[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )];
139 89 : comment = lexeme_d[ (ch_p('%') >> *(~ch_p('\r') & ~ch_p('\n')) >> eol_p)[boost::bind(&PDFGrammar::pushComment, pSelf, _1, _2 )] ];
140 :
141 89 : boolean = (str_p("true") | str_p("false"))[boost::bind(&PDFGrammar::pushBool, pSelf, _1, _2)];
142 :
143 : // workaround workshop compiler: confix_p doesn't work
144 : //stream = confix_p( "stream", *anychar_p, "endstream" )[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
145 89 : stream = (str_p("stream") >> *(anychar_p - str_p("endstream")) >> str_p("endstream"))[boost::bind(&PDFGrammar::emitStream, pSelf, _1, _2 )];
146 :
147 89 : name = lexeme_d[
148 : ch_p('/')
149 : >> (*(anychar_p-chset_p("\t\n\f\r ()<>[]{}/%")-ch_p('\0')))
150 89 : [boost::bind(&PDFGrammar::pushName, pSelf, _1, _2)] ];
151 :
152 : // workaround workshop compiler: confix_p doesn't work
153 : //stringtype = ( confix_p("(",*anychar_p, ")") |
154 : // confix_p("<",*xdigit_p, ">") )
155 : // [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
156 :
157 267 : stringtype = ( ( ch_p('(') >> functor_parser<pdf_string_parser>() >> ch_p(')') ) |
158 178 : ( ch_p('<') >> *xdigit_p >> ch_p('>') ) )
159 356 : [boost::bind(&PDFGrammar::pushString,pSelf, _1, _2)];
160 :
161 89 : null_object = str_p( "null" )[boost::bind(&PDFGrammar::pushNull, pSelf, _1, _2)];
162 :
163 : #ifdef USE_ASSIGN_ACTOR
164 : objectref = ( uint_p[push_back_a(pSelf->m_aUIntStack)]
165 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
166 : >> ch_p('R')
167 : >> eps_p
168 : )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
169 : #else
170 267 : objectref = ( uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
171 356 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
172 356 : >> ch_p('R')
173 178 : >> eps_p
174 356 : )[boost::bind(&PDFGrammar::pushObjectRef, pSelf, _1, _2)];
175 : #endif
176 :
177 : #ifdef USE_ASSIGN_ACTOR
178 : simple_type = objectref | name |
179 : ( real_p[assign_a(pSelf->m_fDouble)] >> eps_p )
180 : [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
181 : | stringtype | boolean | null_object;
182 : #else
183 267 : simple_type = objectref | name |
184 178 : ( real_p[boost::bind(&PDFGrammar::assign_action_double, pSelf, _1)] >> eps_p )
185 356 : [boost::bind(&PDFGrammar::pushDouble, pSelf, _1, _2)]
186 267 : | stringtype | boolean | null_object;
187 : #endif
188 :
189 89 : dict_begin = str_p( "<<" )[boost::bind(&PDFGrammar::beginDict, pSelf, _1, _2)];
190 89 : dict_end = str_p( ">>" )[boost::bind(&PDFGrammar::endDict, pSelf, _1, _2)];
191 :
192 89 : array_begin = str_p("[")[boost::bind(&PDFGrammar::beginArray,pSelf, _1, _2)];
193 89 : array_end = str_p("]")[boost::bind(&PDFGrammar::endArray,pSelf, _1, _2)];
194 :
195 : #ifdef USE_ASSIGN_ACTOR
196 : object_begin= uint_p[push_back_a(pSelf->m_aUIntStack)]
197 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
198 : >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
199 : #else
200 267 : object_begin= uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
201 356 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
202 356 : >> str_p("obj" )[boost::bind(&PDFGrammar::beginObject, pSelf, _1, _2)];
203 : #endif
204 89 : object_end = str_p( "endobj" )[boost::bind(&PDFGrammar::endObject, pSelf, _1, _2)];
205 :
206 267 : xref = str_p( "xref" ) >> uint_p >> uint_p
207 356 : >> lexeme_d[
208 178 : +( repeat_p(10)[digit_p]
209 178 : >> blank_p
210 356 : >> repeat_p(5)[digit_p]
211 178 : >> blank_p
212 356 : >> ( ch_p('n') | ch_p('f') )
213 356 : >> repeat_p(2)[space_p]
214 : ) ];
215 :
216 267 : dict_element= dict_begin | comment | simple_type
217 267 : | array_begin | array_end | dict_end;
218 :
219 89 : object = object_begin
220 178 : >> *dict_element
221 356 : >> !stream
222 267 : >> object_end;
223 :
224 267 : trailer = str_p( "trailer" )[boost::bind(&PDFGrammar::beginTrailer,pSelf,_1,_2)]
225 356 : >> *dict_element
226 356 : >> str_p("startxref")
227 178 : >> uint_p
228 356 : >> str_p("%%EOF")[boost::bind(&PDFGrammar::endTrailer,pSelf,_1,_2)];
229 :
230 : #ifdef USE_ASSIGN_ACTOR
231 : pdfrule = ! (lexeme_d[
232 : str_p( "%PDF-" )
233 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
234 : >> ch_p('.')
235 : >> uint_p[push_back_a(pSelf->m_aUIntStack)]
236 : >> *((~ch_p('\r') & ~ch_p('\n')))
237 : >> eol_p
238 : ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
239 : >> *( comment | object | ( xref >> trailer ) );
240 : #else
241 445 : pdfrule = ! (lexeme_d[
242 : str_p( "%PDF-" )
243 178 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
244 356 : >> ch_p('.')
245 356 : >> uint_p[boost::bind(&PDFGrammar::push_back_action_uint, pSelf, _1)]
246 356 : >> *((~ch_p('\r') & ~ch_p('\n')))
247 178 : >> eol_p
248 356 : ])[boost::bind(&PDFGrammar::haveFile,pSelf, _1, _2)]
249 356 : >> *( comment | object | ( xref >> trailer ) );
250 : #endif
251 89 : }
252 : rule< ScannerT > comment, stream, boolean, name, stringtype, null_object, simple_type,
253 : objectref, array, value, dict_element, dict_begin, dict_end,
254 : array_begin, array_end, object, object_begin, object_end,
255 : xref, trailer, pdfrule;
256 :
257 89 : const rule< ScannerT >& start() const { return pdfrule; }
258 : };
259 :
260 : #ifndef USE_ASSIGN_ACTOR
261 1021 : void push_back_action_uint( unsigned int i )
262 : {
263 1021 : m_aUIntStack.push_back( i );
264 1021 : }
265 407 : void assign_action_double( double d )
266 : {
267 407 : m_fDouble = d;
268 407 : }
269 : #endif
270 :
271 0 : static void parseError( const char* pMessage, iteratorT pLocation )
272 : {
273 0 : throw_( pLocation, pMessage );
274 0 : }
275 :
276 725 : OString iteratorToString( iteratorT first, iteratorT last ) const
277 : {
278 725 : OStringBuffer aStr( 32 );
279 7554 : while( first != last )
280 : {
281 6104 : aStr.append( *first );
282 6104 : ++first;
283 : }
284 725 : return aStr.makeStringAndClear();
285 : }
286 :
287 5 : void haveFile( iteratorT pBegin, SAL_UNUSED_PARAMETER iteratorT /*pEnd*/ )
288 : {
289 5 : if( m_aObjectStack.empty() )
290 : {
291 5 : PDFFile* pFile = new PDFFile();
292 5 : pFile->m_nMinor = m_aUIntStack.back();
293 5 : m_aUIntStack.pop_back();
294 5 : pFile->m_nMajor = m_aUIntStack.back();
295 5 : m_aUIntStack.pop_back();
296 5 : m_aObjectStack.push_back( pFile );
297 : }
298 : else
299 0 : parseError( "found file header in unusual place", pBegin );
300 5 : }
301 :
302 5 : void pushComment( iteratorT first, iteratorT last )
303 : {
304 : // add a comment to the current stack element
305 : PDFComment* pComment =
306 5 : new PDFComment(iteratorToString(first,last));
307 5 : if( m_aObjectStack.empty() )
308 0 : m_aObjectStack.push_back( new PDFPart() );
309 5 : PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
310 5 : if( pContainer == NULL )
311 0 : parseError( "comment without container", first );
312 5 : pContainer->m_aSubElements.push_back( pComment );
313 5 : }
314 :
315 1434 : void insertNewValue( PDFEntry* pNewValue, iteratorT pPos )
316 : {
317 1434 : PDFContainer* pContainer = NULL;
318 1434 : const char* pMsg = NULL;
319 2868 : if( ! m_aObjectStack.empty() &&
320 1434 : (pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back())) != NULL )
321 : {
322 1852 : if( dynamic_cast<PDFDict*>(pContainer) == NULL &&
323 418 : dynamic_cast<PDFArray*>(pContainer) == NULL )
324 : {
325 112 : PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer);
326 112 : if( pObj )
327 : {
328 107 : if( pObj->m_pObject == NULL )
329 107 : pObj->m_pObject = pNewValue;
330 : else
331 : {
332 0 : pMsg = "second value for object";
333 0 : pContainer = NULL;
334 : }
335 : }
336 5 : else if( dynamic_cast<PDFDict*>(pNewValue) )
337 : {
338 5 : PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pContainer);
339 5 : if( pTrailer )
340 : {
341 5 : if( pTrailer->m_pDict == NULL )
342 5 : pTrailer->m_pDict = dynamic_cast<PDFDict*>(pNewValue);
343 : else
344 0 : pContainer = NULL;
345 : }
346 : else
347 0 : pContainer = NULL;
348 : }
349 : else
350 0 : pContainer = NULL;
351 : }
352 : }
353 1434 : if( pContainer )
354 1434 : pContainer->m_aSubElements.push_back( pNewValue );
355 : else
356 : {
357 0 : if( ! pMsg )
358 : {
359 0 : if( dynamic_cast<PDFContainer*>(pNewValue) )
360 0 : pMsg = "array without container";
361 : else
362 0 : pMsg = "value without container";
363 : }
364 0 : delete pNewValue;
365 0 : parseError( pMsg, pPos );
366 : }
367 1434 : }
368 :
369 690 : void pushName( iteratorT first, iteratorT last )
370 : {
371 690 : insertNewValue( new PDFName(iteratorToString(first,last)), first );
372 690 : }
373 :
374 407 : void pushDouble( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
375 : {
376 407 : insertNewValue( new PDFNumber(m_fDouble), first );
377 407 : }
378 :
379 30 : void pushString( iteratorT first, iteratorT last )
380 : {
381 30 : insertNewValue( new PDFString(iteratorToString(first,last)), first );
382 30 : }
383 :
384 5 : void pushBool( iteratorT first, iteratorT last )
385 : {
386 5 : insertNewValue( new PDFBool( (last-first == 4) ), first );
387 5 : }
388 :
389 10 : void pushNull( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
390 : {
391 10 : insertNewValue( new PDFNull(), first );
392 10 : }
393 :
394 :
395 107 : void beginObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT /*last*/ )
396 : {
397 107 : if( m_aObjectStack.empty() )
398 0 : m_aObjectStack.push_back( new PDFPart() );
399 :
400 107 : unsigned int nGeneration = m_aUIntStack.back();
401 107 : m_aUIntStack.pop_back();
402 107 : unsigned int nObject = m_aUIntStack.back();
403 107 : m_aUIntStack.pop_back();
404 :
405 107 : PDFObject* pObj = new PDFObject( nObject, nGeneration );
406 107 : pObj->m_nOffset = first - m_aGlobalBegin;
407 :
408 107 : PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
409 321 : if( pContainer &&
410 214 : ( dynamic_cast<PDFFile*>(pContainer) ||
411 : dynamic_cast<PDFPart*>(pContainer) ) )
412 : {
413 107 : pContainer->m_aSubElements.push_back( pObj );
414 107 : m_aObjectStack.push_back( pObj );
415 : }
416 : else
417 0 : parseError( "object in wrong place", first );
418 107 : }
419 :
420 107 : void endObject( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
421 : {
422 107 : if( m_aObjectStack.empty() )
423 0 : parseError( "endobj without obj", first );
424 107 : else if( dynamic_cast<PDFObject*>(m_aObjectStack.back()) == NULL )
425 0 : parseError( "spurious endobj", first );
426 : else
427 107 : m_aObjectStack.pop_back();
428 107 : }
429 :
430 122 : void pushObjectRef( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
431 : {
432 122 : unsigned int nGeneration = m_aUIntStack.back();
433 122 : m_aUIntStack.pop_back();
434 122 : unsigned int nObject = m_aUIntStack.back();
435 122 : m_aUIntStack.pop_back();
436 122 : insertNewValue( new PDFObjectRef(nObject,nGeneration), first );
437 122 : }
438 :
439 107 : void beginDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
440 : {
441 107 : PDFDict* pDict = new PDFDict();
442 107 : pDict->m_nOffset = first - m_aGlobalBegin;
443 :
444 107 : insertNewValue( pDict, first );
445 : // will not come here if insertion fails (exception)
446 107 : m_aObjectStack.push_back( pDict );
447 107 : }
448 107 : void endDict( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
449 : {
450 107 : PDFDict* pDict = NULL;
451 107 : if( m_aObjectStack.empty() )
452 0 : parseError( "dictionary end without begin", first );
453 107 : else if( (pDict = dynamic_cast<PDFDict*>(m_aObjectStack.back())) == NULL )
454 0 : parseError( "spurious dictionary end", first );
455 : else
456 107 : m_aObjectStack.pop_back();
457 :
458 107 : PDFEntry* pOffender = pDict->buildMap();
459 107 : if( pOffender )
460 : {
461 0 : StringEmitContext aCtx;
462 0 : aCtx.write( "offending dictionary element: ", 30 );
463 0 : pOffender->emit( aCtx );
464 0 : m_aErrorString = aCtx.getString();
465 0 : parseError( m_aErrorString.getStr(), first );
466 : }
467 107 : }
468 :
469 63 : void beginArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
470 : {
471 63 : PDFArray* pArray = new PDFArray();
472 63 : pArray->m_nOffset = first - m_aGlobalBegin;
473 :
474 63 : insertNewValue( pArray, first );
475 : // will not come here if insertion fails (exception)
476 63 : m_aObjectStack.push_back( pArray );
477 63 : }
478 :
479 63 : void endArray( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
480 : {
481 63 : if( m_aObjectStack.empty() )
482 0 : parseError( "array end without begin", first );
483 63 : else if( dynamic_cast<PDFArray*>(m_aObjectStack.back()) == NULL )
484 0 : parseError( "spurious array end", first );
485 : else
486 63 : m_aObjectStack.pop_back();
487 63 : }
488 :
489 34 : void emitStream( iteratorT first, iteratorT last )
490 : {
491 34 : if( m_aObjectStack.empty() )
492 0 : parseError( "stream without object", first );
493 34 : PDFObject* pObj = dynamic_cast<PDFObject*>(m_aObjectStack.back());
494 34 : if( pObj && pObj->m_pObject )
495 : {
496 34 : if( pObj->m_pStream )
497 0 : parseError( "multiple streams in object", first );
498 :
499 34 : PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
500 34 : if( pDict )
501 : {
502 34 : PDFStream* pStream = new PDFStream( first - m_aGlobalBegin, last - m_aGlobalBegin, pDict );
503 :
504 34 : pObj->m_pStream = pStream;
505 34 : pObj->m_aSubElements.push_back( pStream );
506 34 : }
507 : }
508 : else
509 0 : parseError( "stream without object", first );
510 34 : }
511 :
512 5 : void beginTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
513 : {
514 5 : if( m_aObjectStack.empty() )
515 0 : m_aObjectStack.push_back( new PDFPart() );
516 :
517 5 : PDFTrailer* pTrailer = new PDFTrailer();
518 5 : pTrailer->m_nOffset = first - m_aGlobalBegin;
519 :
520 5 : PDFContainer* pContainer = dynamic_cast<PDFContainer*>(m_aObjectStack.back());
521 15 : if( pContainer &&
522 10 : ( dynamic_cast<PDFFile*>(pContainer) ||
523 : dynamic_cast<PDFPart*>(pContainer) ) )
524 : {
525 5 : pContainer->m_aSubElements.push_back( pTrailer );
526 5 : m_aObjectStack.push_back( pTrailer );
527 : }
528 : else
529 0 : parseError( "trailer in wrong place", first );
530 5 : }
531 :
532 5 : void endTrailer( iteratorT first, SAL_UNUSED_PARAMETER iteratorT )
533 : {
534 5 : if( m_aObjectStack.empty() )
535 0 : parseError( "%%EOF without trailer", first );
536 5 : else if( dynamic_cast<PDFTrailer*>(m_aObjectStack.back()) == NULL )
537 0 : parseError( "spurious %%EOF", first );
538 : else
539 5 : m_aObjectStack.pop_back();
540 5 : }
541 : };
542 :
543 : #ifdef WIN32
544 : PDFEntry* PDFReader::read( const char* pBuffer, unsigned int nLen )
545 : {
546 : PDFGrammar<const char*> aGrammar( pBuffer );
547 :
548 : try
549 : {
550 : #if OSL_DEBUG_LEVEL > 1
551 : boost::spirit::parse_info<const char*> aInfo =
552 : #endif
553 : boost::spirit::parse( pBuffer,
554 : pBuffer+nLen,
555 : aGrammar,
556 : boost::spirit::space_p );
557 : #if OSL_DEBUG_LEVEL > 1
558 : SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop = " << aInfo.stop << " (buff=" << pBuffer << ", offset = " << aInfo.stop - pBuffer << "), hit = " << (aInfo.hit ? OUString("true") : OUString("false")) << ", full = " << (aInfo.full ? OUString("true") : OUString("false")) << ", length = " << (int)aInfo.length );
559 : #endif
560 : }
561 : catch( const parser_error<const char*, const char*>& rError )
562 : {
563 : #if OSL_DEBUG_LEVEL > 1
564 : OString aTmp;
565 : unsigned int nElem = aGrammar.m_aObjectStack.size();
566 : for( unsigned int i = 0; i < nElem; i++ )
567 : aTmp += " " + OString(typeid( *(aGrammar.m_aObjectStack[i]) ).name());
568 :
569 : SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - pBuffer << ", object stack: " << aTmp);
570 : #endif
571 : }
572 :
573 : PDFEntry* pRet = NULL;
574 : unsigned int nEntries = aGrammar.m_aObjectStack.size();
575 : if( nEntries == 1 )
576 : {
577 : pRet = aGrammar.m_aObjectStack.back();
578 : aGrammar.m_aObjectStack.pop_back();
579 : }
580 : #if OSL_DEBUG_LEVEL > 1
581 : else if( nEntries > 1 )
582 : SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse" );
583 : #endif
584 :
585 : return pRet;
586 : }
587 : #endif
588 :
589 89 : PDFEntry* PDFReader::read( const char* pFileName )
590 : {
591 : #ifdef WIN32
592 : /* #i106583#
593 : since converting to boost 1.39 file_iterator does not work anymore on all Windows systems
594 : C++ stdlib istream_iterator does not allow "-" apparently
595 : using spirit 2.0 doesn't work in our environment with the MSC
596 :
597 : So for the time being bite the bullet and read the whole file.
598 : FIXME: give Spirit 2.x another try when we upgrade boost again.
599 : */
600 : PDFEntry* pRet = NULL;
601 : FILE* fp = fopen( pFileName, "rb" );
602 : if( fp )
603 : {
604 : fseek( fp, 0, SEEK_END );
605 : unsigned int nLen = (unsigned int)ftell( fp );
606 : fseek( fp, 0, SEEK_SET );
607 : char* pBuf = (char*)rtl_allocateMemory( nLen );
608 : if( pBuf )
609 : {
610 : fread( pBuf, 1, nLen, fp );
611 : pRet = read( pBuf, nLen );
612 : rtl_freeMemory( pBuf );
613 : }
614 : fclose( fp );
615 : }
616 : return pRet;
617 : #else
618 89 : file_iterator<> file_start( pFileName );
619 89 : if( ! file_start )
620 0 : return NULL;
621 178 : file_iterator<> file_end = file_start.make_end();
622 178 : PDFGrammar< file_iterator<> > aGrammar( file_start );
623 :
624 : try
625 : {
626 : #if OSL_DEBUG_LEVEL > 1
627 : boost::spirit::parse_info< file_iterator<> > aInfo =
628 : #endif
629 : boost::spirit::parse( file_start,
630 : file_end,
631 : aGrammar,
632 89 : boost::spirit::space_p );
633 : #if OSL_DEBUG_LEVEL > 1
634 : SAL_INFO("sdext.pdfimport.pdfparse", "parseinfo: stop at offset = " << aInfo.stop - file_start << ", hit = " << (aInfo.hit ? "true" : "false") << ", full = " << (aInfo.full ? "true" : "false") << ", length = " << aInfo.length);
635 : #endif
636 : }
637 0 : catch( const parser_error< const char*, file_iterator<> >& rError )
638 : {
639 : SAL_WARN("sdext.pdfimport.pdfparse", "parse error: " << rError.descriptor << " at buffer pos " << rError.where - file_start);
640 : #if OSL_DEBUG_LEVEL > 1
641 : OUString aTmp;
642 : unsigned int nElem = aGrammar.m_aObjectStack.size();
643 : for( unsigned int i = 0; i < nElem; i++ )
644 : {
645 : aTmp += " ";
646 : aTmp += OUString(typeid( *(aGrammar.m_aObjectStack[i]) ).name(),
647 : strlen(typeid( *(aGrammar.m_aObjectStack[i]) ).name()),
648 : RTL_TEXTENCODING_ASCII_US);
649 : }
650 : SAL_WARN("sdext.pdfimport.pdfparse", "parse error object stack: " << aTmp);
651 : #endif
652 : }
653 :
654 89 : PDFEntry* pRet = NULL;
655 89 : unsigned int nEntries = aGrammar.m_aObjectStack.size();
656 89 : if( nEntries == 1 )
657 : {
658 5 : pRet = aGrammar.m_aObjectStack.back();
659 5 : aGrammar.m_aObjectStack.pop_back();
660 : }
661 : #if OSL_DEBUG_LEVEL > 1
662 : else if( nEntries > 1 )
663 : {
664 : SAL_WARN("sdext.pdfimport.pdfparse", "error got " << nEntries << " stack objects in parse");
665 : for( unsigned int i = 0; i < nEntries; i++ )
666 : {
667 : SAL_WARN("sdext.pdfimport.pdfparse", typeid(*aGrammar.m_aObjectStack[i]).name());
668 : PDFObject* pObj = dynamic_cast<PDFObject*>(aGrammar.m_aObjectStack[i]);
669 : if( pObj )
670 : SAL_WARN("sdext.pdfimport.pdfparse", " -> object " << pObj->m_nNumber << " generation " << pObj->m_nGeneration);
671 : else
672 : SAL_WARN("sdext.pdfimport.pdfparse", "(type " << typeid(*aGrammar.m_aObjectStack[i]).name() << ")");
673 : }
674 : }
675 : #endif
676 178 : return pRet;
677 : #endif // WIN32
678 15 : }
679 :
680 : #if defined _MSC_VER
681 : #pragma warning(pop)
682 : #endif
683 :
684 :
685 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|