Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 :
21 : #include <pdfparse.hxx>
22 :
23 : #include <rtl/strbuf.hxx>
24 : #include <rtl/ustring.hxx>
25 : #include <rtl/ustrbuf.hxx>
26 : #include <rtl/alloc.h>
27 : #include <rtl/digest.h>
28 : #include <rtl/cipher.h>
29 :
30 : #include <zlib.h>
31 :
32 : #include <math.h>
33 : #include <map>
34 :
35 : #include <stdio.h>
36 : #include <string.h>
37 :
38 :
39 :
40 : namespace pdfparse
41 : {
42 :
43 : struct EmitImplData
44 : {
45 : // xref table: maps object number to a pair of (generation, buffer offset)
46 : typedef std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable;
47 : XRefTable m_aXRefTable;
48 : // container of all indirect objects (usually a PDFFile*)
49 : const PDFContainer* m_pObjectContainer;
50 : unsigned int m_nDecryptObject;
51 : unsigned int m_nDecryptGeneration;
52 :
53 : // returns true if the xref table was updated
54 0 : bool insertXref( unsigned int nObject, unsigned int nGeneration, unsigned int nOffset )
55 : {
56 0 : XRefTable::iterator it = m_aXRefTable.find( nObject );
57 0 : if( it == m_aXRefTable.end() )
58 : {
59 : // new entry
60 0 : m_aXRefTable[ nObject ] = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
61 0 : return true;
62 : }
63 : // update old entry, if generation number is higher
64 0 : if( it->second.first < nGeneration )
65 : {
66 0 : it->second = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
67 0 : return true;
68 : }
69 0 : return false;
70 : }
71 :
72 0 : EmitImplData( const PDFContainer* pTopContainer ) :
73 : m_pObjectContainer( pTopContainer ),
74 : m_nDecryptObject( 0 ),
75 0 : m_nDecryptGeneration( 0 )
76 0 : {}
77 0 : ~EmitImplData() {}
78 0 : bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
79 : unsigned int nObject, unsigned int nGeneration ) const
80 : {
81 0 : const PDFFile* pFile = dynamic_cast<const PDFFile*>(m_pObjectContainer);
82 0 : return pFile ? pFile->decrypt( pInBuffer, nLen, pOutBuffer, nObject, nGeneration ) : false;
83 : }
84 :
85 0 : void setDecryptObject( unsigned int nObject, unsigned int nGeneration )
86 : {
87 0 : m_nDecryptObject = nObject;
88 0 : m_nDecryptGeneration = nGeneration;
89 0 : }
90 : };
91 :
92 : }
93 :
94 : using namespace pdfparse;
95 :
96 0 : EmitContext::EmitContext( const PDFContainer* pTop ) :
97 : m_bDeflate( false ),
98 : m_bDecrypt( false ),
99 0 : m_pImplData( NULL )
100 : {
101 0 : if( pTop )
102 0 : m_pImplData = new EmitImplData( pTop );
103 0 : }
104 :
105 0 : EmitContext::~EmitContext()
106 : {
107 0 : delete m_pImplData;
108 0 : }
109 :
110 1116 : PDFEntry::~PDFEntry()
111 : {
112 1116 : }
113 :
114 0 : EmitImplData* PDFEntry::getEmitData( EmitContext& rContext ) const
115 : {
116 0 : return rContext.m_pImplData;
117 : }
118 :
119 0 : void PDFEntry::setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData ) const
120 : {
121 0 : if( rContext.m_pImplData && rContext.m_pImplData != pNewEmitData )
122 0 : delete rContext.m_pImplData;
123 0 : rContext.m_pImplData = pNewEmitData;
124 0 : }
125 :
126 888 : PDFValue::~PDFValue()
127 : {
128 888 : }
129 :
130 6 : PDFComment::~PDFComment()
131 : {
132 6 : }
133 :
134 0 : bool PDFComment::emit( EmitContext& rWriteContext ) const
135 : {
136 0 : return rWriteContext.write( m_aComment.getStr(), m_aComment.getLength() );
137 : }
138 :
139 0 : PDFEntry* PDFComment::clone() const
140 : {
141 0 : return new PDFComment( m_aComment );
142 : }
143 :
144 972 : PDFName::~PDFName()
145 : {
146 972 : }
147 :
148 0 : bool PDFName::emit( EmitContext& rWriteContext ) const
149 : {
150 0 : if( ! rWriteContext.write( " /", 2 ) )
151 0 : return false;
152 0 : return rWriteContext.write( m_aName.getStr(), m_aName.getLength() );
153 : }
154 :
155 0 : PDFEntry* PDFName::clone() const
156 : {
157 0 : return new PDFName( m_aName );
158 : }
159 :
160 0 : OUString PDFName::getFilteredName() const
161 : {
162 0 : OStringBuffer aFilter( m_aName.getLength() );
163 0 : const sal_Char* pStr = m_aName.getStr();
164 0 : unsigned int nLen = m_aName.getLength();
165 0 : for( unsigned int i = 0; i < nLen; i++ )
166 : {
167 0 : if( (i < nLen - 3) && pStr[i] == '#' )
168 : {
169 0 : sal_Char rResult = 0;
170 0 : i++;
171 0 : if( pStr[i] >= '0' && pStr[i] <= '9' )
172 0 : rResult = sal_Char( pStr[i]-'0' ) << 4;
173 0 : else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
174 0 : rResult = sal_Char( pStr[i]-'a' + 10 ) << 4;
175 0 : else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
176 0 : rResult = sal_Char( pStr[i]-'A' + 10 ) << 4;
177 0 : i++;
178 0 : if( pStr[i] >= '0' && pStr[i] <= '9' )
179 0 : rResult |= sal_Char( pStr[i]-'0' );
180 0 : else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
181 0 : rResult |= sal_Char( pStr[i]-'a' + 10 );
182 0 : else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
183 0 : rResult |= sal_Char( pStr[i]-'A' + 10 );
184 0 : aFilter.append( rResult );
185 : }
186 : else
187 0 : aFilter.append( pStr[i] );
188 : }
189 0 : return OStringToOUString( aFilter.makeStringAndClear(), RTL_TEXTENCODING_UTF8 );
190 : }
191 :
192 36 : PDFString::~PDFString()
193 : {
194 36 : }
195 :
196 0 : bool PDFString::emit( EmitContext& rWriteContext ) const
197 : {
198 0 : if( ! rWriteContext.write( " ", 1 ) )
199 0 : return false;
200 0 : EmitImplData* pEData = getEmitData( rWriteContext );
201 0 : if( rWriteContext.m_bDecrypt && pEData && pEData->m_nDecryptObject )
202 : {
203 0 : OString aFiltered( getFilteredString() );
204 : // decrypt inplace (evil since OString is supposed to be const
205 : // however in this case we know that getFilteredString returned a singular string instance
206 0 : pEData->decrypt( (sal_uInt8*)aFiltered.getStr(), aFiltered.getLength(),
207 0 : (sal_uInt8*)aFiltered.getStr(),
208 0 : pEData->m_nDecryptObject, pEData->m_nDecryptGeneration );
209 : // check for string or hex string
210 0 : const sal_Char* pStr = aFiltered.getStr();
211 0 : if( aFiltered.getLength() > 1 &&
212 0 : ( ((unsigned char)pStr[0] == 0xff && (unsigned char)pStr[1] == 0xfe) ||
213 0 : ((unsigned char)pStr[0] == 0xfe && (unsigned char)pStr[1] == 0xff) ) )
214 : {
215 : static const char pHexTab[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
216 : '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
217 0 : if( ! rWriteContext.write( "<", 1 ) )
218 0 : return false;
219 0 : for( sal_Int32 i = 0; i < aFiltered.getLength(); i++ )
220 : {
221 0 : if( ! rWriteContext.write( pHexTab + ((sal_uInt32(pStr[i]) >> 4) & 0x0f), 1 ) )
222 0 : return false;
223 0 : if( ! rWriteContext.write( pHexTab + (sal_uInt32(pStr[i]) & 0x0f), 1 ) )
224 0 : return false;
225 : }
226 0 : if( ! rWriteContext.write( ">", 1 ) )
227 0 : return false;
228 : }
229 : else
230 : {
231 0 : if( ! rWriteContext.write( "(", 1 ) )
232 0 : return false;
233 0 : if( ! rWriteContext.write( aFiltered.getStr(), aFiltered.getLength() ) )
234 0 : return false;
235 0 : if( ! rWriteContext.write( ")", 1 ) )
236 0 : return false;
237 : }
238 0 : return true;
239 : }
240 0 : return rWriteContext.write( m_aString.getStr(), m_aString.getLength() );
241 : }
242 :
243 0 : PDFEntry* PDFString::clone() const
244 : {
245 0 : return new PDFString( m_aString );
246 : }
247 :
248 3 : OString PDFString::getFilteredString() const
249 : {
250 3 : int nLen = m_aString.getLength();
251 3 : OStringBuffer aBuf( nLen );
252 :
253 3 : const sal_Char* pStr = m_aString.getStr();
254 3 : if( *pStr == '(' )
255 : {
256 0 : const sal_Char* pRun = pStr+1;
257 0 : while( pRun - pStr < nLen-1 )
258 : {
259 0 : if( *pRun == '\\' )
260 : {
261 0 : pRun++;
262 0 : if( pRun - pStr < nLen )
263 : {
264 0 : sal_Char aEsc = 0;
265 0 : if( *pRun == 'n' )
266 0 : aEsc = '\n';
267 0 : else if( *pRun == 'r' )
268 0 : aEsc = '\r';
269 0 : else if( *pRun == 't' )
270 0 : aEsc = '\t';
271 0 : else if( *pRun == 'b' )
272 0 : aEsc = '\b';
273 0 : else if( *pRun == 'f' )
274 0 : aEsc = '\f';
275 0 : else if( *pRun == '(' )
276 0 : aEsc = '(';
277 0 : else if( *pRun == ')' )
278 0 : aEsc = ')';
279 0 : else if( *pRun == '\\' )
280 0 : aEsc = '\\';
281 0 : else if( *pRun == '\n' )
282 : {
283 0 : pRun++;
284 0 : continue;
285 : }
286 0 : else if( *pRun == '\r' )
287 : {
288 0 : pRun++;
289 0 : if( *pRun == '\n' )
290 0 : pRun++;
291 0 : continue;
292 : }
293 : else
294 : {
295 0 : int i = 0;
296 0 : while( i++ < 3 && *pRun >= '0' && *pRun <= '7' )
297 0 : aEsc = 8*aEsc + (*pRun++ - '0');
298 : // move pointer back to last character of octal sequence
299 0 : pRun--;
300 : }
301 0 : aBuf.append( aEsc );
302 : }
303 : }
304 : else
305 0 : aBuf.append( *pRun );
306 : // move pointer to next character
307 0 : pRun++;
308 : }
309 : }
310 3 : else if( *pStr == '<' )
311 : {
312 3 : const sal_Char* pRun = pStr+1;
313 54 : while( *pRun != '>' && pRun - pStr < nLen )
314 : {
315 48 : sal_Char rResult = 0;
316 48 : if( *pRun >= '0' && *pRun <= '9' )
317 33 : rResult = sal_Char( *pRun-'0' ) << 4;
318 15 : else if( *pRun >= 'a' && *pRun <= 'f' )
319 0 : rResult = sal_Char( *pRun-'a' + 10 ) << 4;
320 15 : else if( *pRun >= 'A' && *pRun <= 'F' )
321 15 : rResult = sal_Char( *pRun-'A' + 10 ) << 4;
322 48 : pRun++;
323 48 : if( *pRun != '>' && pRun - pStr < nLen )
324 : {
325 48 : if( *pRun >= '0' && *pRun <= '9' )
326 36 : rResult |= sal_Char( *pRun-'0' );
327 12 : else if( *pRun >= 'a' && *pRun <= 'f' )
328 0 : rResult |= sal_Char( *pRun-'a' + 10 );
329 12 : else if( *pRun >= 'A' && *pRun <= 'F' )
330 12 : rResult |= sal_Char( *pRun-'A' + 10 );
331 : }
332 48 : pRun++;
333 48 : aBuf.append( rResult );
334 : }
335 : }
336 :
337 3 : return aBuf.makeStringAndClear();
338 : }
339 :
340 582 : PDFNumber::~PDFNumber()
341 : {
342 582 : }
343 :
344 0 : bool PDFNumber::emit( EmitContext& rWriteContext ) const
345 : {
346 0 : OStringBuffer aBuf( 32 );
347 0 : aBuf.append( ' ' );
348 :
349 0 : double fValue = m_fValue;
350 0 : bool bNeg = false;
351 0 : int nPrecision = 5;
352 0 : if( fValue < 0.0 )
353 : {
354 0 : bNeg = true;
355 0 : fValue=-fValue;
356 : }
357 :
358 0 : sal_Int64 nInt = (sal_Int64)fValue;
359 0 : fValue -= (double)nInt;
360 : // optimizing hardware may lead to a value of 1.0 after the subtraction
361 0 : if( fValue == 1.0 || log10( 1.0-fValue ) <= -nPrecision )
362 : {
363 0 : nInt++;
364 0 : fValue = 0.0;
365 : }
366 0 : sal_Int64 nFrac = 0;
367 0 : if( fValue )
368 : {
369 0 : fValue *= pow( 10.0, (double)nPrecision );
370 0 : nFrac = (sal_Int64)fValue;
371 : }
372 0 : if( bNeg && ( nInt || nFrac ) )
373 0 : aBuf.append( '-' );
374 0 : aBuf.append( nInt );
375 0 : if( nFrac )
376 : {
377 : int i;
378 0 : aBuf.append( '.' );
379 0 : sal_Int64 nBound = (sal_Int64)(pow( 10.0, nPrecision - 1.0 )+0.5);
380 0 : for ( i = 0; ( i < nPrecision ) && nFrac; i++ )
381 : {
382 0 : sal_Int64 nNumb = nFrac / nBound;
383 0 : nFrac -= nNumb * nBound;
384 0 : aBuf.append( nNumb );
385 0 : nBound /= 10;
386 : }
387 : }
388 :
389 0 : return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
390 : }
391 :
392 0 : PDFEntry* PDFNumber::clone() const
393 : {
394 0 : return new PDFNumber( m_fValue );
395 : }
396 :
397 :
398 6 : PDFBool::~PDFBool()
399 : {
400 6 : }
401 :
402 0 : bool PDFBool::emit( EmitContext& rWriteContext ) const
403 : {
404 0 : return m_bValue ? rWriteContext.write( " true", 5 ) : rWriteContext.write( " false", 6 );
405 : }
406 :
407 0 : PDFEntry* PDFBool::clone() const
408 : {
409 0 : return new PDFBool( m_bValue );
410 : }
411 :
412 12 : PDFNull::~PDFNull()
413 : {
414 12 : }
415 :
416 0 : bool PDFNull::emit( EmitContext& rWriteContext ) const
417 : {
418 0 : return rWriteContext.write( " null", 5 );
419 : }
420 :
421 0 : PDFEntry* PDFNull::clone() const
422 : {
423 0 : return new PDFNull();
424 : }
425 :
426 :
427 168 : PDFObjectRef::~PDFObjectRef()
428 : {
429 168 : }
430 :
431 0 : bool PDFObjectRef::emit( EmitContext& rWriteContext ) const
432 : {
433 0 : OStringBuffer aBuf( 16 );
434 0 : aBuf.append( ' ' );
435 0 : aBuf.append( sal_Int32( m_nNumber ) );
436 0 : aBuf.append( ' ' );
437 0 : aBuf.append( sal_Int32( m_nGeneration ) );
438 0 : aBuf.append( " R", 2 );
439 0 : return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
440 : }
441 :
442 0 : PDFEntry* PDFObjectRef::clone() const
443 : {
444 0 : return new PDFObjectRef( m_nNumber, m_nGeneration );
445 : }
446 :
447 402 : PDFContainer::~PDFContainer()
448 : {
449 201 : int nEle = m_aSubElements.size();
450 1314 : for( int i = 0; i < nEle; i++ )
451 1113 : delete m_aSubElements[i];
452 201 : }
453 :
454 0 : bool PDFContainer::emitSubElements( EmitContext& rWriteContext ) const
455 : {
456 0 : int nEle = m_aSubElements.size();
457 0 : for( int i = 0; i < nEle; i++ )
458 : {
459 0 : if( rWriteContext.m_bDecrypt )
460 : {
461 0 : const PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i]);
462 0 : if (pName && pName->m_aName.equalsL(RTL_CONSTASCII_STRINGPARAM("Encrypt")))
463 : {
464 0 : i++;
465 0 : continue;
466 : }
467 : }
468 0 : if( ! m_aSubElements[i]->emit( rWriteContext ) )
469 0 : return false;
470 : }
471 0 : return true;
472 : }
473 :
474 0 : void PDFContainer::cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const
475 : {
476 0 : int nEle = m_aSubElements.size();
477 0 : for( int i = 0; i < nEle; i++ )
478 0 : rNewSubElements.push_back( m_aSubElements[i]->clone() );
479 0 : }
480 :
481 0 : PDFObject* PDFContainer::findObject( unsigned int nNumber, unsigned int nGeneration ) const
482 : {
483 0 : unsigned int nEle = m_aSubElements.size();
484 0 : for( unsigned int i = 0; i < nEle; i++ )
485 : {
486 0 : PDFObject* pObject = dynamic_cast<PDFObject*>(m_aSubElements[i]);
487 0 : if( pObject &&
488 0 : pObject->m_nNumber == nNumber &&
489 0 : pObject->m_nGeneration == nGeneration )
490 : {
491 0 : return pObject;
492 : }
493 : }
494 0 : return NULL;
495 : }
496 :
497 90 : PDFArray::~PDFArray()
498 : {
499 90 : }
500 :
501 0 : bool PDFArray::emit( EmitContext& rWriteContext ) const
502 : {
503 0 : if( ! rWriteContext.write( "[", 1 ) )
504 0 : return false;
505 0 : if( ! emitSubElements( rWriteContext ) )
506 0 : return false;
507 0 : return rWriteContext.write( "]", 1 );
508 : }
509 :
510 0 : PDFEntry* PDFArray::clone() const
511 : {
512 0 : PDFArray* pNewAr = new PDFArray();
513 0 : cloneSubElements( pNewAr->m_aSubElements );
514 0 : return pNewAr;
515 : }
516 :
517 150 : PDFDict::~PDFDict()
518 : {
519 150 : }
520 :
521 0 : bool PDFDict::emit( EmitContext& rWriteContext ) const
522 : {
523 0 : if( ! rWriteContext.write( "<<\n", 3 ) )
524 0 : return false;
525 0 : if( ! emitSubElements( rWriteContext ) )
526 0 : return false;
527 0 : return rWriteContext.write( "\n>>\n", 4 );
528 : }
529 :
530 0 : void PDFDict::insertValue( const OString& rName, PDFEntry* pValue )
531 : {
532 0 : if( ! pValue )
533 0 : eraseValue( rName );
534 :
535 0 : boost::unordered_map<OString,PDFEntry*,OStringHash>::iterator it = m_aMap.find( rName );
536 0 : if( it == m_aMap.end() )
537 : {
538 : // new name/value, pair, append it
539 0 : m_aSubElements.push_back( new PDFName( rName ) );
540 0 : m_aSubElements.push_back( pValue );
541 : }
542 : else
543 : {
544 0 : unsigned int nSub = m_aSubElements.size();
545 0 : for( unsigned int i = 0; i < nSub; i++ )
546 0 : if( m_aSubElements[i] == it->second )
547 0 : m_aSubElements[i] = pValue;
548 0 : delete it->second;
549 : }
550 0 : m_aMap[ rName ] = pValue;
551 0 : }
552 :
553 0 : void PDFDict::eraseValue( const OString& rName )
554 : {
555 0 : unsigned int nEle = m_aSubElements.size();
556 0 : for( unsigned int i = 0; i < nEle; i++ )
557 : {
558 0 : PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i]);
559 0 : if( pName && pName->m_aName.equals( rName ) )
560 : {
561 0 : for( unsigned int j = i+1; j < nEle; j++ )
562 : {
563 0 : if( dynamic_cast<PDFComment*>(m_aSubElements[j]) == NULL )
564 : {
565 : // free name and value
566 0 : delete m_aSubElements[j];
567 0 : delete m_aSubElements[i];
568 : // remove subelements from vector
569 0 : m_aSubElements.erase( m_aSubElements.begin()+j );
570 0 : m_aSubElements.erase( m_aSubElements.begin()+i );
571 0 : buildMap();
572 0 : return;
573 : }
574 : }
575 : }
576 : }
577 : }
578 :
579 75 : PDFEntry* PDFDict::buildMap()
580 : {
581 : // clear map
582 75 : m_aMap.clear();
583 : // build map
584 75 : unsigned int nEle = m_aSubElements.size();
585 75 : PDFName* pName = NULL;
586 795 : for( unsigned int i = 0; i < nEle; i++ )
587 : {
588 720 : if( dynamic_cast<PDFComment*>(m_aSubElements[i]) == NULL )
589 : {
590 720 : if( pName )
591 : {
592 360 : m_aMap[ pName->m_aName ] = m_aSubElements[i];
593 360 : pName = NULL;
594 : }
595 360 : else if( (pName = dynamic_cast<PDFName*>(m_aSubElements[i])) == NULL )
596 0 : return m_aSubElements[i];
597 : }
598 : }
599 75 : return pName;
600 : }
601 :
602 0 : PDFEntry* PDFDict::clone() const
603 : {
604 0 : PDFDict* pNewDict = new PDFDict();
605 0 : cloneSubElements( pNewDict->m_aSubElements );
606 0 : pNewDict->buildMap();
607 0 : return pNewDict;
608 : }
609 :
610 48 : PDFStream::~PDFStream()
611 : {
612 48 : }
613 :
614 0 : bool PDFStream::emit( EmitContext& rWriteContext ) const
615 : {
616 0 : return rWriteContext.copyOrigBytes( m_nBeginOffset, m_nEndOffset-m_nBeginOffset );
617 : }
618 :
619 0 : PDFEntry* PDFStream::clone() const
620 : {
621 0 : return new PDFStream( m_nBeginOffset, m_nEndOffset, NULL );
622 : }
623 :
624 0 : unsigned int PDFStream::getDictLength( const PDFContainer* pContainer ) const
625 : {
626 0 : if( ! m_pDict )
627 0 : return 0;
628 : // find /Length entry, can either be a direct or indirect number object
629 : boost::unordered_map<OString,PDFEntry*,OStringHash>::const_iterator it =
630 0 : m_pDict->m_aMap.find( "Length" );
631 0 : if( it == m_pDict->m_aMap.end() )
632 0 : return 0;
633 0 : PDFNumber* pNum = dynamic_cast<PDFNumber*>(it->second);
634 0 : if( ! pNum && pContainer )
635 : {
636 0 : PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(it->second);
637 0 : if( pRef )
638 : {
639 0 : int nEle = pContainer->m_aSubElements.size();
640 0 : for( int i = 0; i < nEle && ! pNum; i++ )
641 : {
642 0 : PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer->m_aSubElements[i]);
643 0 : if( pObj &&
644 0 : pObj->m_nNumber == pRef->m_nNumber &&
645 0 : pObj->m_nGeneration == pRef->m_nGeneration )
646 : {
647 0 : if( pObj->m_pObject )
648 0 : pNum = dynamic_cast<PDFNumber*>(pObj->m_pObject);
649 0 : break;
650 : }
651 : }
652 : }
653 : }
654 0 : return pNum ? static_cast<unsigned int>(pNum->m_fValue) : 0;
655 : }
656 :
657 150 : PDFObject::~PDFObject()
658 : {
659 150 : }
660 :
661 0 : bool PDFObject::getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const
662 : {
663 0 : bool bIsDeflated = false;
664 0 : if( m_pStream && m_pStream->m_pDict &&
665 0 : m_pStream->m_nEndOffset > m_pStream->m_nBeginOffset+15
666 : )
667 : {
668 0 : unsigned int nOuterStreamLen = m_pStream->m_nEndOffset - m_pStream->m_nBeginOffset;
669 0 : *ppStream = static_cast<char*>(rtl_allocateMemory( nOuterStreamLen ));
670 0 : if( ! ppStream )
671 : {
672 0 : *pBytes = 0;
673 0 : return false;
674 : }
675 0 : unsigned int nRead = rContext.readOrigBytes( m_pStream->m_nBeginOffset, nOuterStreamLen, *ppStream );
676 0 : if( nRead != nOuterStreamLen )
677 : {
678 0 : rtl_freeMemory( *ppStream );
679 0 : *ppStream = NULL;
680 0 : *pBytes = 0;
681 0 : return false;
682 : }
683 : // is there a filter entry ?
684 : boost::unordered_map<OString,PDFEntry*,OStringHash>::const_iterator it =
685 0 : m_pStream->m_pDict->m_aMap.find( "Filter" );
686 0 : if( it != m_pStream->m_pDict->m_aMap.end() )
687 : {
688 0 : PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
689 0 : if( ! pFilter )
690 : {
691 0 : PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
692 0 : if( pArray && ! pArray->m_aSubElements.empty() )
693 : {
694 0 : pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front());
695 : }
696 : }
697 :
698 : // is the (first) filter FlateDecode ?
699 0 : if (pFilter && pFilter->m_aName.equalsL(RTL_CONSTASCII_STRINGPARAM("FlateDecode")))
700 : {
701 0 : bIsDeflated = true;
702 : }
703 : }
704 : // prepare compressed data section
705 0 : char* pStream = *ppStream;
706 0 : if( pStream[0] == 's' )
707 0 : pStream += 6; // skip "stream"
708 : // skip line end after "stream"
709 0 : while( *pStream == '\r' || *pStream == '\n' )
710 0 : pStream++;
711 : // get the compressed length
712 0 : *pBytes = m_pStream->getDictLength( pObjectContainer );
713 0 : if( pStream != *ppStream )
714 0 : memmove( *ppStream, pStream, *pBytes );
715 0 : if( rContext.m_bDecrypt )
716 : {
717 0 : EmitImplData* pEData = getEmitData( rContext );
718 : pEData->decrypt( reinterpret_cast<const sal_uInt8*>(*ppStream),
719 : *pBytes,
720 : reinterpret_cast<sal_uInt8*>(*ppStream),
721 : m_nNumber,
722 : m_nGeneration
723 0 : ); // decrypt inplace
724 0 : }
725 : }
726 : else
727 0 : *ppStream = NULL, *pBytes = 0;
728 0 : return bIsDeflated;
729 : }
730 :
731 0 : static void unzipToBuffer( const char* pBegin, unsigned int nLen,
732 : sal_uInt8** pOutBuf, sal_uInt32* pOutLen )
733 : {
734 : z_stream aZStr;
735 0 : aZStr.next_in = (Bytef*)pBegin;
736 0 : aZStr.avail_in = nLen;
737 0 : aZStr.zalloc = ( alloc_func )0;
738 0 : aZStr.zfree = ( free_func )0;
739 0 : aZStr.opaque = ( voidpf )0;
740 :
741 0 : int err = inflateInit(&aZStr);
742 :
743 0 : const unsigned int buf_increment_size = 16384;
744 :
745 0 : *pOutBuf = (sal_uInt8*)rtl_reallocateMemory( *pOutBuf, buf_increment_size );
746 0 : aZStr.next_out = (Bytef*)*pOutBuf;
747 0 : aZStr.avail_out = buf_increment_size;
748 0 : *pOutLen = buf_increment_size;
749 0 : while( err != Z_STREAM_END && err >= Z_OK && aZStr.avail_in )
750 : {
751 0 : err = inflate( &aZStr, Z_NO_FLUSH );
752 0 : if( aZStr.avail_out == 0 )
753 : {
754 0 : if( err != Z_STREAM_END )
755 : {
756 0 : const int nNewAlloc = *pOutLen + buf_increment_size;
757 0 : *pOutBuf = (sal_uInt8*)rtl_reallocateMemory( *pOutBuf, nNewAlloc );
758 0 : aZStr.next_out = (Bytef*)(*pOutBuf + *pOutLen);
759 0 : aZStr.avail_out = buf_increment_size;
760 0 : *pOutLen = nNewAlloc;
761 : }
762 : }
763 : }
764 0 : if( err == Z_STREAM_END )
765 : {
766 0 : if( aZStr.avail_out > 0 )
767 0 : *pOutLen -= aZStr.avail_out;;
768 : }
769 0 : inflateEnd(&aZStr);
770 0 : if( err < Z_OK )
771 : {
772 0 : rtl_freeMemory( *pOutBuf );
773 0 : *pOutBuf = NULL;
774 0 : *pOutLen = 0;
775 : }
776 0 : }
777 :
778 0 : bool PDFObject::writeStream( EmitContext& rWriteContext, const PDFFile* pParsedFile ) const
779 : {
780 0 : bool bSuccess = false;
781 0 : if( m_pStream )
782 : {
783 0 : char* pStream = NULL;
784 0 : unsigned int nBytes = 0;
785 0 : if( getDeflatedStream( &pStream, &nBytes, pParsedFile, rWriteContext ) && nBytes && rWriteContext.m_bDeflate )
786 : {
787 0 : sal_uInt8* pOutBytes = NULL;
788 0 : sal_uInt32 nOutBytes = 0;
789 0 : unzipToBuffer( pStream, nBytes, &pOutBytes, &nOutBytes );
790 0 : rWriteContext.write( pOutBytes, nOutBytes );
791 0 : rtl_freeMemory( pOutBytes );
792 : }
793 0 : else if( pStream && nBytes )
794 0 : rWriteContext.write( pStream, nBytes );
795 0 : rtl_freeMemory( pStream );
796 : }
797 0 : return bSuccess;
798 : }
799 :
800 0 : bool PDFObject::emit( EmitContext& rWriteContext ) const
801 : {
802 0 : if( ! rWriteContext.write( "\n", 1 ) )
803 0 : return false;
804 :
805 0 : EmitImplData* pEData = getEmitData( rWriteContext );
806 0 : if( pEData )
807 0 : pEData->insertXref( m_nNumber, m_nGeneration, rWriteContext.getCurPos() );
808 :
809 0 : OStringBuffer aBuf( 32 );
810 0 : aBuf.append( sal_Int32( m_nNumber ) );
811 0 : aBuf.append( ' ' );
812 0 : aBuf.append( sal_Int32( m_nGeneration ) );
813 0 : aBuf.append( " obj\n" );
814 0 : if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
815 0 : return false;
816 :
817 0 : if( pEData )
818 0 : pEData->setDecryptObject( m_nNumber, m_nGeneration );
819 0 : if( (rWriteContext.m_bDeflate || rWriteContext.m_bDecrypt) && pEData )
820 : {
821 0 : char* pStream = NULL;
822 0 : unsigned int nBytes = 0;
823 0 : bool bDeflate = getDeflatedStream( &pStream, &nBytes, pEData->m_pObjectContainer, rWriteContext );
824 0 : if( pStream && nBytes )
825 : {
826 : // unzip the stream
827 0 : sal_uInt8* pOutBytes = NULL;
828 0 : sal_uInt32 nOutBytes = 0;
829 0 : if( bDeflate && rWriteContext.m_bDeflate )
830 0 : unzipToBuffer( pStream, nBytes, &pOutBytes, &nOutBytes );
831 : else
832 : {
833 : // nothing to deflate, but decryption has happened
834 0 : pOutBytes = (sal_uInt8*)pStream;
835 0 : nOutBytes = (sal_uInt32)nBytes;
836 : }
837 :
838 0 : if( nOutBytes )
839 : {
840 : // clone this object
841 0 : PDFObject* pClone = static_cast<PDFObject*>(clone());
842 : // set length in the dictionary to new stream length
843 0 : PDFNumber* pNewLen = new PDFNumber( double(nOutBytes) );
844 0 : pClone->m_pStream->m_pDict->insertValue( "Length", pNewLen );
845 :
846 0 : if( bDeflate && rWriteContext.m_bDeflate )
847 : {
848 : // delete flatedecode filter
849 : boost::unordered_map<OString,PDFEntry*,OStringHash>::const_iterator it =
850 0 : pClone->m_pStream->m_pDict->m_aMap.find( "Filter" );
851 0 : if( it != pClone->m_pStream->m_pDict->m_aMap.end() )
852 : {
853 0 : PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
854 0 : if (pFilter && pFilter->m_aName.equalsL(RTL_CONSTASCII_STRINGPARAM("FlateDecode")))
855 0 : pClone->m_pStream->m_pDict->eraseValue( "Filter" );
856 : else
857 : {
858 0 : PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
859 0 : if( pArray && ! pArray->m_aSubElements.empty() )
860 : {
861 0 : pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front());
862 0 : if (pFilter && pFilter->m_aName.equalsL(RTL_CONSTASCII_STRINGPARAM("FlateDecode")))
863 : {
864 0 : delete pFilter;
865 0 : pArray->m_aSubElements.erase( pArray->m_aSubElements.begin() );
866 : }
867 : }
868 : }
869 : }
870 : }
871 :
872 : // write sub elements except stream
873 0 : bool bRet = true;
874 0 : unsigned int nEle = pClone->m_aSubElements.size();
875 0 : for( unsigned int i = 0; i < nEle && bRet; i++ )
876 : {
877 0 : if( pClone->m_aSubElements[i] != pClone->m_pStream )
878 0 : bRet = pClone->m_aSubElements[i]->emit( rWriteContext );
879 : }
880 0 : delete pClone;
881 : // write stream
882 0 : if( bRet )
883 0 : rWriteContext.write( "stream\n", 7 );
884 0 : if( bRet )
885 0 : bRet = rWriteContext.write( pOutBytes, nOutBytes );
886 0 : if( bRet )
887 0 : bRet = rWriteContext.write( "\nendstream\nendobj\n", 18 );
888 0 : rtl_freeMemory( pStream );
889 0 : if( pOutBytes != (sal_uInt8*)pStream )
890 0 : rtl_freeMemory( pOutBytes );
891 0 : if( pEData )
892 0 : pEData->setDecryptObject( 0, 0 );
893 0 : return bRet;
894 : }
895 0 : if( pOutBytes != (sal_uInt8*)pStream )
896 0 : rtl_freeMemory( pOutBytes );
897 : }
898 0 : rtl_freeMemory( pStream );
899 : }
900 :
901 0 : bool bRet = emitSubElements( rWriteContext ) &&
902 0 : rWriteContext.write( "\nendobj\n", 8 );
903 0 : if( pEData )
904 0 : pEData->setDecryptObject( 0, 0 );
905 0 : return bRet;
906 : }
907 :
908 0 : PDFEntry* PDFObject::clone() const
909 : {
910 0 : PDFObject* pNewOb = new PDFObject( m_nNumber, m_nGeneration );
911 0 : cloneSubElements( pNewOb->m_aSubElements );
912 0 : unsigned int nEle = m_aSubElements.size();
913 0 : for( unsigned int i = 0; i < nEle; i++ )
914 : {
915 0 : if( m_aSubElements[i] == m_pObject )
916 0 : pNewOb->m_pObject = pNewOb->m_aSubElements[i];
917 0 : else if( m_aSubElements[i] == m_pStream && pNewOb->m_pObject )
918 : {
919 0 : pNewOb->m_pStream = dynamic_cast<PDFStream*>(pNewOb->m_aSubElements[i]);
920 0 : PDFDict* pNewDict = dynamic_cast<PDFDict*>(pNewOb->m_pObject);
921 0 : if( pNewDict )
922 0 : pNewOb->m_pStream->m_pDict = pNewDict;
923 : }
924 : }
925 0 : return pNewOb;
926 : }
927 :
928 6 : PDFTrailer::~PDFTrailer()
929 : {
930 6 : }
931 :
932 0 : bool PDFTrailer::emit( EmitContext& rWriteContext ) const
933 : {
934 : // get xref offset
935 0 : unsigned int nXRefPos = rWriteContext.getCurPos();
936 : // begin xref section, object 0 is always free
937 0 : if( ! rWriteContext.write( "xref\r\n"
938 : "0 1\r\n"
939 0 : "0000000000 65535 f\r\n", 31 ) )
940 0 : return false;
941 : // check if we are emitting a complete PDF file
942 0 : EmitImplData* pEData = getEmitData( rWriteContext );
943 0 : if( pEData )
944 : {
945 : // emit object xrefs
946 0 : const EmitImplData::XRefTable& rXRefs = pEData->m_aXRefTable;
947 0 : EmitImplData::XRefTable::const_iterator section_begin, section_end;
948 0 : section_begin = rXRefs.begin();
949 0 : while( section_begin != rXRefs.end() )
950 : {
951 : // find end of continuous object numbers
952 0 : section_end = section_begin;
953 0 : unsigned int nLast = section_begin->first;
954 0 : while( (++section_end) != rXRefs.end() &&
955 0 : section_end->first == nLast+1 )
956 0 : nLast = section_end->first;
957 : // write first object number and number of following entries
958 0 : OStringBuffer aBuf( 21 );
959 0 : aBuf.append( sal_Int32( section_begin->first ) );
960 0 : aBuf.append( ' ' );
961 0 : aBuf.append( sal_Int32(nLast - section_begin->first + 1) );
962 0 : aBuf.append( "\r\n" );
963 0 : if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
964 0 : return false;
965 0 : while( section_begin != section_end )
966 : {
967 : // write 20 char entry of form
968 : // 0000offset 00gen n\r\n
969 0 : aBuf.setLength( 0 );
970 0 : OString aOffset( OString::valueOf( sal_Int64(section_begin->second.second ) ) );
971 0 : int nPad = 10 - aOffset.getLength();
972 0 : for( int i = 0; i < nPad; i++ )
973 0 : aBuf.append( '0' );
974 0 : aBuf.append( aOffset );
975 0 : aBuf.append( ' ' );
976 0 : OString aGeneration( OString::valueOf( sal_Int32(section_begin->second.first ) ) );
977 0 : nPad = 5 - aGeneration.getLength();
978 0 : for( int i = 0; i < nPad; i++ )
979 0 : aBuf.append( '0' );
980 0 : aBuf.append( aGeneration );
981 0 : aBuf.append( " n\r\n" );
982 0 : if( ! rWriteContext.write( aBuf.getStr(), 20 ) )
983 0 : return false;
984 0 : ++section_begin;
985 0 : }
986 0 : }
987 : }
988 0 : if( ! rWriteContext.write( "trailer\n", 8 ) )
989 0 : return false;
990 0 : if( ! emitSubElements( rWriteContext ) )
991 0 : return false;
992 0 : if( ! rWriteContext.write( "startxref\n", 10 ) )
993 0 : return false;
994 0 : OString aOffset( OString::valueOf( sal_Int32(nXRefPos) ) );
995 0 : if( ! rWriteContext.write( aOffset.getStr(), aOffset.getLength() ) )
996 0 : return false;
997 0 : return rWriteContext.write( "\n%%EOF\n", 7 );
998 : }
999 :
1000 0 : PDFEntry* PDFTrailer::clone() const
1001 : {
1002 0 : PDFTrailer* pNewTr = new PDFTrailer();
1003 0 : cloneSubElements( pNewTr->m_aSubElements );
1004 0 : unsigned int nEle = m_aSubElements.size();
1005 0 : for( unsigned int i = 0; i < nEle; i++ )
1006 : {
1007 0 : if( m_aSubElements[i] == m_pDict )
1008 : {
1009 0 : pNewTr->m_pDict = dynamic_cast<PDFDict*>(pNewTr->m_aSubElements[i]);
1010 0 : break;
1011 : }
1012 : }
1013 0 : return pNewTr;
1014 : }
1015 :
1016 : #define ENCRYPTION_KEY_LEN 16
1017 : #define ENCRYPTION_BUF_LEN 32
1018 :
1019 : namespace pdfparse {
1020 : struct PDFFileImplData
1021 : {
1022 : bool m_bIsEncrypted;
1023 : bool m_bStandardHandler;
1024 : sal_uInt32 m_nAlgoVersion;
1025 : sal_uInt32 m_nStandardRevision;
1026 : sal_uInt32 m_nKeyLength;
1027 : sal_uInt8 m_aOEntry[32];
1028 : sal_uInt8 m_aUEntry[32];
1029 : sal_uInt32 m_nPEntry;
1030 : OString m_aDocID;
1031 : rtlCipher m_aCipher;
1032 : rtlDigest m_aDigest;
1033 :
1034 : sal_uInt8 m_aDecryptionKey[ENCRYPTION_KEY_LEN+5]; // maximum handled key length
1035 :
1036 3 : PDFFileImplData() :
1037 : m_bIsEncrypted( false ),
1038 : m_bStandardHandler( false ),
1039 : m_nAlgoVersion( 0 ),
1040 : m_nStandardRevision( 0 ),
1041 : m_nKeyLength( 0 ),
1042 : m_nPEntry( 0 ),
1043 : m_aCipher( NULL ),
1044 3 : m_aDigest( NULL )
1045 : {
1046 3 : memset( m_aOEntry, 0, sizeof( m_aOEntry ) );
1047 3 : memset( m_aUEntry, 0, sizeof( m_aUEntry ) );
1048 3 : memset( m_aDecryptionKey, 0, sizeof( m_aDecryptionKey ) );
1049 3 : }
1050 :
1051 3 : ~PDFFileImplData()
1052 3 : {
1053 3 : if( m_aCipher )
1054 0 : rtl_cipher_destroyARCFOUR( m_aCipher );
1055 3 : if( m_aDigest )
1056 0 : rtl_digest_destroyMD5( m_aDigest );
1057 3 : }
1058 : };
1059 : }
1060 :
1061 9 : PDFFile::~PDFFile()
1062 : {
1063 3 : if( m_pData )
1064 3 : delete m_pData;
1065 6 : }
1066 :
1067 3 : bool PDFFile::isEncrypted() const
1068 : {
1069 3 : return impl_getData()->m_bIsEncrypted;
1070 : }
1071 :
1072 0 : bool PDFFile::decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
1073 : unsigned int nObject, unsigned int nGeneration ) const
1074 : {
1075 0 : if( ! isEncrypted() )
1076 0 : return false;
1077 :
1078 0 : if( ! m_pData->m_aCipher )
1079 0 : m_pData->m_aCipher = rtl_cipher_createARCFOUR( rtl_Cipher_ModeStream );
1080 :
1081 : // modify encryption key
1082 0 : sal_uInt32 i = m_pData->m_nKeyLength;
1083 0 : m_pData->m_aDecryptionKey[i++] = sal_uInt8(nObject&0xff);
1084 0 : m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>8)&0xff);
1085 0 : m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>16)&0xff);
1086 0 : m_pData->m_aDecryptionKey[i++] = sal_uInt8(nGeneration&0xff);
1087 0 : m_pData->m_aDecryptionKey[i++] = sal_uInt8((nGeneration>>8)&0xff);
1088 :
1089 : sal_uInt8 aSum[ENCRYPTION_KEY_LEN];
1090 0 : rtl_digest_updateMD5( m_pData->m_aDigest, m_pData->m_aDecryptionKey, i );
1091 0 : rtl_digest_getMD5( m_pData->m_aDigest, aSum, sizeof( aSum ) );
1092 :
1093 0 : if( i > 16 )
1094 0 : i = 16;
1095 :
1096 : rtlCipherError aErr = rtl_cipher_initARCFOUR( m_pData->m_aCipher,
1097 : rtl_Cipher_DirectionDecode,
1098 : aSum, i,
1099 0 : NULL, 0 );
1100 0 : if( aErr == rtl_Cipher_E_None )
1101 : aErr = rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1102 : pInBuffer, nLen,
1103 0 : pOutBuffer, nLen );
1104 0 : return aErr == rtl_Cipher_E_None;
1105 : }
1106 :
1107 : static const sal_uInt8 nPadString[32] =
1108 : {
1109 : 0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
1110 : 0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
1111 : };
1112 :
1113 0 : static void pad_or_truncate_to_32( const OString& rStr, sal_Char* pBuffer )
1114 : {
1115 0 : int nLen = rStr.getLength();
1116 0 : if( nLen > 32 )
1117 0 : nLen = 32;
1118 0 : const sal_Char* pStr = rStr.getStr();
1119 0 : memcpy( pBuffer, pStr, nLen );
1120 0 : int i = 0;
1121 0 : while( nLen < 32 )
1122 0 : pBuffer[nLen++] = nPadString[i++];
1123 0 : }
1124 :
1125 : // pass at least pData->m_nKeyLength bytes in
1126 0 : static sal_uInt32 password_to_key( const OString& rPwd, sal_uInt8* pOutKey, PDFFileImplData* pData, bool bComputeO )
1127 : {
1128 : // see PDF reference 1.4 Algorithm 3.2
1129 : // encrypt pad string
1130 : sal_Char aPadPwd[ENCRYPTION_BUF_LEN];
1131 0 : pad_or_truncate_to_32( rPwd, aPadPwd );
1132 0 : rtl_digest_updateMD5( pData->m_aDigest, aPadPwd, sizeof( aPadPwd ) );
1133 0 : if( ! bComputeO )
1134 : {
1135 0 : rtl_digest_updateMD5( pData->m_aDigest, pData->m_aOEntry, 32 );
1136 : sal_uInt8 aPEntry[4];
1137 0 : aPEntry[0] = static_cast<sal_uInt8>(pData->m_nPEntry & 0xff);
1138 0 : aPEntry[1] = static_cast<sal_uInt8>((pData->m_nPEntry >> 8 ) & 0xff);
1139 0 : aPEntry[2] = static_cast<sal_uInt8>((pData->m_nPEntry >> 16) & 0xff);
1140 0 : aPEntry[3] = static_cast<sal_uInt8>((pData->m_nPEntry >> 24) & 0xff);
1141 0 : rtl_digest_updateMD5( pData->m_aDigest, aPEntry, sizeof(aPEntry) );
1142 0 : rtl_digest_updateMD5( pData->m_aDigest, pData->m_aDocID.getStr(), pData->m_aDocID.getLength() );
1143 : }
1144 : sal_uInt8 nSum[RTL_DIGEST_LENGTH_MD5];
1145 0 : rtl_digest_getMD5( pData->m_aDigest, nSum, sizeof(nSum) );
1146 0 : if( pData->m_nStandardRevision == 3 )
1147 : {
1148 0 : for( int i = 0; i < 50; i++ )
1149 : {
1150 0 : rtl_digest_updateMD5( pData->m_aDigest, nSum, sizeof(nSum) );
1151 0 : rtl_digest_getMD5( pData->m_aDigest, nSum, sizeof(nSum) );
1152 : }
1153 : }
1154 0 : sal_uInt32 nLen = pData->m_nKeyLength;
1155 0 : if( nLen > RTL_DIGEST_LENGTH_MD5 )
1156 0 : nLen = RTL_DIGEST_LENGTH_MD5;
1157 0 : memcpy( pOutKey, nSum, nLen );
1158 0 : return nLen;
1159 : }
1160 :
1161 0 : static bool check_user_password( const OString& rPwd, PDFFileImplData* pData )
1162 : {
1163 : // see PDF reference 1.4 Algorithm 3.6
1164 0 : bool bValid = false;
1165 : sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
1166 : sal_uInt8 nEncryptedEntry[ENCRYPTION_BUF_LEN];
1167 0 : memset( nEncryptedEntry, 0, sizeof(nEncryptedEntry) );
1168 0 : sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, pData, false );
1169 : // save (at this time potential) decryption key for later use
1170 0 : memcpy( pData->m_aDecryptionKey, aKey, nKeyLen );
1171 0 : if( pData->m_nStandardRevision == 2 )
1172 : {
1173 : // see PDF reference 1.4 Algorithm 3.4
1174 : // encrypt pad string
1175 : rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1176 : aKey, nKeyLen,
1177 0 : NULL, 0 );
1178 : rtl_cipher_encodeARCFOUR( pData->m_aCipher, nPadString, sizeof( nPadString ),
1179 0 : nEncryptedEntry, sizeof( nEncryptedEntry ) );
1180 0 : bValid = (memcmp( nEncryptedEntry, pData->m_aUEntry, 32 ) == 0);
1181 : }
1182 0 : else if( pData->m_nStandardRevision == 3 )
1183 : {
1184 : // see PDF reference 1.4 Algorithm 3.5
1185 0 : rtl_digest_updateMD5( pData->m_aDigest, nPadString, sizeof( nPadString ) );
1186 0 : rtl_digest_updateMD5( pData->m_aDigest, pData->m_aDocID.getStr(), pData->m_aDocID.getLength() );
1187 0 : rtl_digest_getMD5( pData->m_aDigest, nEncryptedEntry, sizeof(nEncryptedEntry) );
1188 : rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1189 0 : aKey, sizeof(aKey), NULL, 0 );
1190 : rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1191 : nEncryptedEntry, 16,
1192 0 : nEncryptedEntry, 16 ); // encrypt in place
1193 0 : for( int i = 1; i <= 19; i++ ) // do it 19 times, start with 1
1194 : {
1195 : sal_uInt8 aTempKey[ENCRYPTION_KEY_LEN];
1196 0 : for( sal_uInt32 j = 0; j < sizeof(aTempKey); j++ )
1197 0 : aTempKey[j] = static_cast<sal_uInt8>( aKey[j] ^ i );
1198 :
1199 : rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1200 0 : aTempKey, sizeof(aTempKey), NULL, 0 );
1201 : rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1202 : nEncryptedEntry, 16,
1203 0 : nEncryptedEntry, 16 ); // encrypt in place
1204 : }
1205 0 : bValid = (memcmp( nEncryptedEntry, pData->m_aUEntry, 16 ) == 0);
1206 : }
1207 0 : return bValid;
1208 : }
1209 :
1210 0 : bool PDFFile::usesSupportedEncryptionFormat() const
1211 : {
1212 0 : return m_pData->m_bStandardHandler &&
1213 0 : m_pData->m_nAlgoVersion >= 1 &&
1214 0 : m_pData->m_nAlgoVersion <= 2 &&
1215 0 : m_pData->m_nStandardRevision >= 2 &&
1216 0 : m_pData->m_nStandardRevision <= 3;
1217 : }
1218 :
1219 0 : bool PDFFile::setupDecryptionData( const OString& rPwd ) const
1220 : {
1221 0 : if( !impl_getData()->m_bIsEncrypted )
1222 0 : return rPwd.isEmpty();
1223 :
1224 : // check if we can handle this encryption at all
1225 0 : if( ! usesSupportedEncryptionFormat() )
1226 0 : return false;
1227 :
1228 0 : if( ! m_pData->m_aCipher )
1229 0 : m_pData->m_aCipher = rtl_cipher_createARCFOUR(rtl_Cipher_ModeStream);
1230 0 : if( ! m_pData->m_aDigest )
1231 0 : m_pData->m_aDigest = rtl_digest_createMD5();
1232 :
1233 : // first try user password
1234 0 : bool bValid = check_user_password( rPwd, m_pData );
1235 :
1236 0 : if( ! bValid )
1237 : {
1238 : // try owner password
1239 : // see PDF reference 1.4 Algorithm 3.7
1240 : sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
1241 : sal_uInt8 nPwd[ENCRYPTION_BUF_LEN];
1242 0 : memset( nPwd, 0, sizeof(nPwd) );
1243 0 : sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, m_pData, true );
1244 0 : if( m_pData->m_nStandardRevision == 2 )
1245 : {
1246 : rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1247 0 : aKey, nKeyLen, NULL, 0 );
1248 : rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1249 : m_pData->m_aOEntry, 32,
1250 0 : nPwd, 32 );
1251 : }
1252 0 : else if( m_pData->m_nStandardRevision == 3 )
1253 : {
1254 0 : memcpy( nPwd, m_pData->m_aOEntry, 32 );
1255 0 : for( int i = 19; i >= 0; i-- )
1256 : {
1257 : sal_uInt8 nTempKey[ENCRYPTION_KEY_LEN];
1258 0 : for( unsigned int j = 0; j < sizeof(nTempKey); j++ )
1259 0 : nTempKey[j] = sal_uInt8(aKey[j] ^ i);
1260 : rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1261 0 : nTempKey, nKeyLen, NULL, 0 );
1262 : rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1263 : nPwd, 32,
1264 0 : nPwd, 32 ); // decrypt inplace
1265 : }
1266 : }
1267 0 : bValid = check_user_password( OString( (sal_Char*)nPwd, 32 ), m_pData );
1268 : }
1269 :
1270 0 : return bValid;
1271 : }
1272 :
1273 0 : OUString PDFFile::getDecryptionKey() const
1274 : {
1275 0 : OUStringBuffer aBuf( ENCRYPTION_KEY_LEN * 2 );
1276 0 : if( impl_getData()->m_bIsEncrypted )
1277 : {
1278 0 : for( sal_uInt32 i = 0; i < m_pData->m_nKeyLength; i++ )
1279 : {
1280 : static const sal_Unicode pHexTab[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
1281 : '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
1282 0 : aBuf.append( pHexTab[(m_pData->m_aDecryptionKey[i] >> 4) & 0x0f] );
1283 0 : aBuf.append( pHexTab[(m_pData->m_aDecryptionKey[i] & 0x0f)] );
1284 : }
1285 :
1286 : }
1287 0 : return aBuf.makeStringAndClear();
1288 : }
1289 :
1290 3 : PDFFileImplData* PDFFile::impl_getData() const
1291 : {
1292 3 : if( m_pData )
1293 0 : return m_pData;
1294 3 : m_pData = new PDFFileImplData();
1295 : // check for encryption dict in a trailer
1296 3 : unsigned int nElements = m_aSubElements.size();
1297 87 : while( nElements-- > 0 )
1298 : {
1299 81 : PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(m_aSubElements[nElements]);
1300 81 : if( pTrailer && pTrailer->m_pDict )
1301 : {
1302 : // search doc id
1303 3 : PDFDict::Map::iterator doc_id = pTrailer->m_pDict->m_aMap.find( "ID" );
1304 3 : if( doc_id != pTrailer->m_pDict->m_aMap.end() )
1305 : {
1306 3 : PDFArray* pArr = dynamic_cast<PDFArray*>(doc_id->second);
1307 3 : if( pArr && pArr->m_aSubElements.size() > 0 )
1308 : {
1309 3 : PDFString* pStr = dynamic_cast<PDFString*>(pArr->m_aSubElements[0]);
1310 3 : if( pStr )
1311 3 : m_pData->m_aDocID = pStr->getFilteredString();
1312 : #if OSL_DEBUG_LEVEL > 1
1313 : OUString aTmp;
1314 : for( int i = 0; i < m_pData->m_aDocID.getLength(); i++ )
1315 : aTmp += OUString::number((unsigned int)sal_uInt8(m_pData->m_aDocID.getStr()[i]), 16);
1316 : SAL_INFO("sdext.pdfimport.pdfparse", "DocId is <" << OUStringToOString(aTmp, RTL_TEXTENCODING_UTF8).getStr() << ">");
1317 : #endif
1318 : }
1319 : }
1320 : // search Encrypt entry
1321 : PDFDict::Map::iterator enc =
1322 3 : pTrailer->m_pDict->m_aMap.find( "Encrypt" );
1323 3 : if( enc != pTrailer->m_pDict->m_aMap.end() )
1324 : {
1325 0 : PDFDict* pDict = dynamic_cast<PDFDict*>(enc->second);
1326 0 : if( ! pDict )
1327 : {
1328 0 : PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(enc->second);
1329 0 : if( pRef )
1330 : {
1331 0 : PDFObject* pObj = findObject( pRef );
1332 0 : if( pObj && pObj->m_pObject )
1333 0 : pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
1334 : }
1335 : }
1336 0 : if( pDict )
1337 : {
1338 0 : PDFDict::Map::iterator filter = pDict->m_aMap.find( "Filter" );
1339 0 : PDFDict::Map::iterator version = pDict->m_aMap.find( "V" );
1340 0 : PDFDict::Map::iterator len = pDict->m_aMap.find( "Length" );
1341 0 : PDFDict::Map::iterator o_ent = pDict->m_aMap.find( "O" );
1342 0 : PDFDict::Map::iterator u_ent = pDict->m_aMap.find( "U" );
1343 0 : PDFDict::Map::iterator r_ent = pDict->m_aMap.find( "R" );
1344 0 : PDFDict::Map::iterator p_ent = pDict->m_aMap.find( "P" );
1345 0 : if( filter != pDict->m_aMap.end() )
1346 : {
1347 0 : m_pData->m_bIsEncrypted = true;
1348 0 : m_pData->m_nKeyLength = 5;
1349 0 : if( version != pDict->m_aMap.end() )
1350 : {
1351 0 : PDFNumber* pNum = dynamic_cast<PDFNumber*>(version->second);
1352 0 : if( pNum )
1353 0 : m_pData->m_nAlgoVersion = static_cast<sal_uInt32>(pNum->m_fValue);
1354 : }
1355 0 : if( m_pData->m_nAlgoVersion >= 3 )
1356 0 : m_pData->m_nKeyLength = 16;
1357 0 : if( len != pDict->m_aMap.end() )
1358 : {
1359 0 : PDFNumber* pNum = dynamic_cast<PDFNumber*>(len->second);
1360 0 : if( pNum )
1361 0 : m_pData->m_nKeyLength = static_cast<sal_uInt32>(pNum->m_fValue) / 8;
1362 : }
1363 0 : PDFName* pFilter = dynamic_cast<PDFName*>(filter->second);
1364 0 : if( pFilter && pFilter->getFilteredName() == "Standard" )
1365 0 : m_pData->m_bStandardHandler = true;
1366 0 : if( o_ent != pDict->m_aMap.end() )
1367 : {
1368 0 : PDFString* pString = dynamic_cast<PDFString*>(o_ent->second);
1369 0 : if( pString )
1370 : {
1371 0 : OString aEnt = pString->getFilteredString();
1372 0 : if( aEnt.getLength() == 32 )
1373 0 : memcpy( m_pData->m_aOEntry, aEnt.getStr(), 32 );
1374 : #if OSL_DEBUG_LEVEL > 1
1375 : else
1376 : {
1377 : OUString aTmp;
1378 : for( int i = 0; i < aEnt.getLength(); i++ )
1379 : aTmp += " " + OUString::number((unsigned int)sal_uInt8(aEnt.getStr()[i]), 16);
1380 : SAL_WARN("sdext.pdfimport.pdfparse",
1381 : "O entry has length " << (int)aEnt.getLength() << ", should be 32 <" << OUStringToOString(aTmp, RTL_TEXTENCODING_UTF8).getStr() << ">" );
1382 : }
1383 : #endif
1384 : }
1385 : }
1386 0 : if( u_ent != pDict->m_aMap.end() )
1387 : {
1388 0 : PDFString* pString = dynamic_cast<PDFString*>(u_ent->second);
1389 0 : if( pString )
1390 : {
1391 0 : OString aEnt = pString->getFilteredString();
1392 0 : if( aEnt.getLength() == 32 )
1393 0 : memcpy( m_pData->m_aUEntry, aEnt.getStr(), 32 );
1394 : #if OSL_DEBUG_LEVEL > 1
1395 : else
1396 : {
1397 : OUString aTmp;
1398 : for( int i = 0; i < aEnt.getLength(); i++ )
1399 : aTmp += " " + OUString::number((unsigned int)sal_uInt8(aEnt.getStr()[i]), 16);
1400 : SAL_WARN("sdext.pdfimport.pdfparse",
1401 : "U entry has length " << (int)aEnt.getLength() << ", should be 32 <" << OUStringToOString(aTmp, RTL_TEXTENCODING_UTF8).getStr() << ">" );
1402 : }
1403 : #endif
1404 : }
1405 : }
1406 0 : if( r_ent != pDict->m_aMap.end() )
1407 : {
1408 0 : PDFNumber* pNum = dynamic_cast<PDFNumber*>(r_ent->second);
1409 0 : if( pNum )
1410 0 : m_pData->m_nStandardRevision = static_cast<sal_uInt32>(pNum->m_fValue);
1411 : }
1412 0 : if( p_ent != pDict->m_aMap.end() )
1413 : {
1414 0 : PDFNumber* pNum = dynamic_cast<PDFNumber*>(p_ent->second);
1415 0 : if( pNum )
1416 0 : m_pData->m_nPEntry = static_cast<sal_uInt32>(static_cast<sal_Int32>(pNum->m_fValue));
1417 : SAL_INFO("sdext.pdfimport.pdfparse", "p entry is " << m_pData->m_nPEntry );
1418 : }
1419 :
1420 : SAL_INFO("sdext.pdfimport.pdfparse", "Encryption dict: sec handler: " << (pFilter ? OUStringToOString( pFilter->getFilteredName(), RTL_TEXTENCODING_UTF8 ).getStr() : "<unknown>") << ", version = " << (int)m_pData->m_nAlgoVersion << ", revision = " << (int)m_pData->m_nStandardRevision << ", key length = " << m_pData->m_nKeyLength );
1421 0 : break;
1422 : }
1423 : }
1424 : }
1425 : }
1426 : }
1427 :
1428 3 : return m_pData;
1429 : }
1430 :
1431 0 : bool PDFFile::emit( EmitContext& rWriteContext ) const
1432 : {
1433 0 : setEmitData( rWriteContext, new EmitImplData( this ) );
1434 :
1435 0 : OStringBuffer aBuf( 32 );
1436 0 : aBuf.append( "%PDF-" );
1437 0 : aBuf.append( sal_Int32( m_nMajor ) );
1438 0 : aBuf.append( '.' );
1439 0 : aBuf.append( sal_Int32( m_nMinor ) );
1440 0 : aBuf.append( "\n" );
1441 0 : if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
1442 0 : return false;
1443 0 : return emitSubElements( rWriteContext );
1444 : }
1445 :
1446 0 : PDFEntry* PDFFile::clone() const
1447 : {
1448 0 : PDFFile* pNewFl = new PDFFile();
1449 0 : pNewFl->m_nMajor = m_nMajor;
1450 0 : pNewFl->m_nMinor = m_nMinor;
1451 0 : cloneSubElements( pNewFl->m_aSubElements );
1452 0 : return pNewFl;
1453 : }
1454 :
1455 0 : PDFPart::~PDFPart()
1456 : {
1457 0 : }
1458 :
1459 0 : bool PDFPart::emit( EmitContext& rWriteContext ) const
1460 : {
1461 0 : return emitSubElements( rWriteContext );
1462 : }
1463 :
1464 0 : PDFEntry* PDFPart::clone() const
1465 : {
1466 0 : PDFPart* pNewPt = new PDFPart();
1467 0 : cloneSubElements( pNewPt->m_aSubElements );
1468 0 : return pNewPt;
1469 : }
1470 :
1471 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|