Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 :
21 : #include <pdfparse.hxx>
22 :
23 : #include <rtl/strbuf.hxx>
24 : #include <rtl/ustring.hxx>
25 : #include <rtl/ustrbuf.hxx>
26 : #include <rtl/alloc.h>
27 : #include <rtl/digest.h>
28 : #include <rtl/cipher.h>
29 :
30 : #include <zlib.h>
31 :
32 : #include <math.h>
33 : #include <map>
34 :
35 : #include <string.h>
36 :
37 :
38 :
39 : namespace pdfparse
40 : {
41 :
42 : struct EmitImplData
43 : {
44 : // xref table: maps object number to a pair of (generation, buffer offset)
45 : typedef std::map< unsigned int, std::pair< unsigned int, unsigned int > > XRefTable;
46 : XRefTable m_aXRefTable;
47 : // container of all indirect objects (usually a PDFFile*)
48 : const PDFContainer* m_pObjectContainer;
49 : unsigned int m_nDecryptObject;
50 : unsigned int m_nDecryptGeneration;
51 :
52 : // returns true if the xref table was updated
53 0 : bool insertXref( unsigned int nObject, unsigned int nGeneration, unsigned int nOffset )
54 : {
55 0 : XRefTable::iterator it = m_aXRefTable.find( nObject );
56 0 : if( it == m_aXRefTable.end() )
57 : {
58 : // new entry
59 0 : m_aXRefTable[ nObject ] = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
60 0 : return true;
61 : }
62 : // update old entry, if generation number is higher
63 0 : if( it->second.first < nGeneration )
64 : {
65 0 : it->second = std::pair<unsigned int, unsigned int>(nGeneration,nOffset);
66 0 : return true;
67 : }
68 0 : return false;
69 : }
70 :
71 0 : EmitImplData( const PDFContainer* pTopContainer ) :
72 : m_pObjectContainer( pTopContainer ),
73 : m_nDecryptObject( 0 ),
74 0 : m_nDecryptGeneration( 0 )
75 0 : {}
76 0 : ~EmitImplData() {}
77 0 : bool decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
78 : unsigned int nObject, unsigned int nGeneration ) const
79 : {
80 0 : const PDFFile* pFile = dynamic_cast<const PDFFile*>(m_pObjectContainer);
81 0 : return pFile && pFile->decrypt( pInBuffer, nLen, pOutBuffer, nObject, nGeneration );
82 : }
83 :
84 0 : void setDecryptObject( unsigned int nObject, unsigned int nGeneration )
85 : {
86 0 : m_nDecryptObject = nObject;
87 0 : m_nDecryptGeneration = nGeneration;
88 0 : }
89 : };
90 :
91 : }
92 :
93 : using namespace pdfparse;
94 :
95 0 : EmitContext::EmitContext( const PDFContainer* pTop ) :
96 : m_bDeflate( false ),
97 : m_bDecrypt( false ),
98 0 : m_pImplData( NULL )
99 : {
100 0 : if( pTop )
101 0 : m_pImplData = new EmitImplData( pTop );
102 0 : }
103 :
104 0 : EmitContext::~EmitContext()
105 : {
106 0 : delete m_pImplData;
107 0 : }
108 :
109 1590 : PDFEntry::~PDFEntry()
110 : {
111 1590 : }
112 :
113 0 : EmitImplData* PDFEntry::getEmitData( EmitContext& rContext )
114 : {
115 0 : return rContext.m_pImplData;
116 : }
117 :
118 0 : void PDFEntry::setEmitData( EmitContext& rContext, EmitImplData* pNewEmitData )
119 : {
120 0 : if( rContext.m_pImplData && rContext.m_pImplData != pNewEmitData )
121 0 : delete rContext.m_pImplData;
122 0 : rContext.m_pImplData = pNewEmitData;
123 0 : }
124 :
125 1264 : PDFValue::~PDFValue()
126 : {
127 1264 : }
128 :
129 10 : PDFComment::~PDFComment()
130 : {
131 10 : }
132 :
133 0 : bool PDFComment::emit( EmitContext& rWriteContext ) const
134 : {
135 0 : return rWriteContext.write( m_aComment.getStr(), m_aComment.getLength() );
136 : }
137 :
138 0 : PDFEntry* PDFComment::clone() const
139 : {
140 0 : return new PDFComment( m_aComment );
141 : }
142 :
143 1380 : PDFName::~PDFName()
144 : {
145 1380 : }
146 :
147 0 : bool PDFName::emit( EmitContext& rWriteContext ) const
148 : {
149 0 : if( ! rWriteContext.write( " /", 2 ) )
150 0 : return false;
151 0 : return rWriteContext.write( m_aName.getStr(), m_aName.getLength() );
152 : }
153 :
154 0 : PDFEntry* PDFName::clone() const
155 : {
156 0 : return new PDFName( m_aName );
157 : }
158 :
159 0 : OUString PDFName::getFilteredName() const
160 : {
161 0 : OStringBuffer aFilter( m_aName.getLength() );
162 0 : const sal_Char* pStr = m_aName.getStr();
163 0 : unsigned int nLen = m_aName.getLength();
164 0 : for( unsigned int i = 0; i < nLen; i++ )
165 : {
166 0 : if( (i < nLen - 3) && pStr[i] == '#' )
167 : {
168 0 : sal_Char rResult = 0;
169 0 : i++;
170 0 : if( pStr[i] >= '0' && pStr[i] <= '9' )
171 0 : rResult = sal_Char( pStr[i]-'0' ) << 4;
172 0 : else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
173 0 : rResult = sal_Char( pStr[i]-'a' + 10 ) << 4;
174 0 : else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
175 0 : rResult = sal_Char( pStr[i]-'A' + 10 ) << 4;
176 0 : i++;
177 0 : if( pStr[i] >= '0' && pStr[i] <= '9' )
178 0 : rResult |= sal_Char( pStr[i]-'0' );
179 0 : else if( pStr[i] >= 'a' && pStr[i] <= 'f' )
180 0 : rResult |= sal_Char( pStr[i]-'a' + 10 );
181 0 : else if( pStr[i] >= 'A' && pStr[i] <= 'F' )
182 0 : rResult |= sal_Char( pStr[i]-'A' + 10 );
183 0 : aFilter.append( rResult );
184 : }
185 : else
186 0 : aFilter.append( pStr[i] );
187 : }
188 0 : return OStringToOUString( aFilter.makeStringAndClear(), RTL_TEXTENCODING_UTF8 );
189 : }
190 :
191 60 : PDFString::~PDFString()
192 : {
193 60 : }
194 :
195 0 : bool PDFString::emit( EmitContext& rWriteContext ) const
196 : {
197 0 : if( ! rWriteContext.write( " ", 1 ) )
198 0 : return false;
199 0 : EmitImplData* pEData = getEmitData( rWriteContext );
200 0 : if( rWriteContext.m_bDecrypt && pEData && pEData->m_nDecryptObject )
201 : {
202 0 : OString aFiltered( getFilteredString() );
203 : // decrypt inplace (evil since OString is supposed to be const
204 : // however in this case we know that getFilteredString returned a singular string instance
205 0 : pEData->decrypt( reinterpret_cast<sal_uInt8 const *>(aFiltered.getStr()), aFiltered.getLength(),
206 0 : reinterpret_cast<sal_uInt8 *>(const_cast<char *>(aFiltered.getStr())),
207 0 : pEData->m_nDecryptObject, pEData->m_nDecryptGeneration );
208 : // check for string or hex string
209 0 : const sal_Char* pStr = aFiltered.getStr();
210 0 : if( aFiltered.getLength() > 1 &&
211 0 : ( ((unsigned char)pStr[0] == 0xff && (unsigned char)pStr[1] == 0xfe) ||
212 0 : ((unsigned char)pStr[0] == 0xfe && (unsigned char)pStr[1] == 0xff) ) )
213 : {
214 : static const char pHexTab[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
215 : '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
216 0 : if( ! rWriteContext.write( "<", 1 ) )
217 0 : return false;
218 0 : for( sal_Int32 i = 0; i < aFiltered.getLength(); i++ )
219 : {
220 0 : if( ! rWriteContext.write( pHexTab + ((sal_uInt32(pStr[i]) >> 4) & 0x0f), 1 ) )
221 0 : return false;
222 0 : if( ! rWriteContext.write( pHexTab + (sal_uInt32(pStr[i]) & 0x0f), 1 ) )
223 0 : return false;
224 : }
225 0 : if( ! rWriteContext.write( ">", 1 ) )
226 0 : return false;
227 : }
228 : else
229 : {
230 0 : if( ! rWriteContext.write( "(", 1 ) )
231 0 : return false;
232 0 : if( ! rWriteContext.write( aFiltered.getStr(), aFiltered.getLength() ) )
233 0 : return false;
234 0 : if( ! rWriteContext.write( ")", 1 ) )
235 0 : return false;
236 : }
237 0 : return true;
238 : }
239 0 : return rWriteContext.write( m_aString.getStr(), m_aString.getLength() );
240 : }
241 :
242 0 : PDFEntry* PDFString::clone() const
243 : {
244 0 : return new PDFString( m_aString );
245 : }
246 :
247 5 : OString PDFString::getFilteredString() const
248 : {
249 5 : int nLen = m_aString.getLength();
250 5 : OStringBuffer aBuf( nLen );
251 :
252 5 : const sal_Char* pStr = m_aString.getStr();
253 5 : if( *pStr == '(' )
254 : {
255 0 : const sal_Char* pRun = pStr+1;
256 0 : while( pRun - pStr < nLen-1 )
257 : {
258 0 : if( *pRun == '\\' )
259 : {
260 0 : pRun++;
261 0 : if( pRun - pStr < nLen )
262 : {
263 0 : sal_Char aEsc = 0;
264 0 : if( *pRun == 'n' )
265 0 : aEsc = '\n';
266 0 : else if( *pRun == 'r' )
267 0 : aEsc = '\r';
268 0 : else if( *pRun == 't' )
269 0 : aEsc = '\t';
270 0 : else if( *pRun == 'b' )
271 0 : aEsc = '\b';
272 0 : else if( *pRun == 'f' )
273 0 : aEsc = '\f';
274 0 : else if( *pRun == '(' )
275 0 : aEsc = '(';
276 0 : else if( *pRun == ')' )
277 0 : aEsc = ')';
278 0 : else if( *pRun == '\\' )
279 0 : aEsc = '\\';
280 0 : else if( *pRun == '\n' )
281 : {
282 0 : pRun++;
283 0 : continue;
284 : }
285 0 : else if( *pRun == '\r' )
286 : {
287 0 : pRun++;
288 0 : if( *pRun == '\n' )
289 0 : pRun++;
290 0 : continue;
291 : }
292 : else
293 : {
294 0 : int i = 0;
295 0 : while( i++ < 3 && *pRun >= '0' && *pRun <= '7' )
296 0 : aEsc = 8*aEsc + (*pRun++ - '0');
297 : // move pointer back to last character of octal sequence
298 0 : pRun--;
299 : }
300 0 : aBuf.append( aEsc );
301 : }
302 : }
303 : else
304 0 : aBuf.append( *pRun );
305 : // move pointer to next character
306 0 : pRun++;
307 : }
308 : }
309 5 : else if( *pStr == '<' )
310 : {
311 5 : const sal_Char* pRun = pStr+1;
312 90 : while( *pRun != '>' && pRun - pStr < nLen )
313 : {
314 80 : sal_Char rResult = 0;
315 80 : if( *pRun >= '0' && *pRun <= '9' )
316 53 : rResult = sal_Char( *pRun-'0' ) << 4;
317 27 : else if( *pRun >= 'a' && *pRun <= 'f' )
318 0 : rResult = sal_Char( *pRun-'a' + 10 ) << 4;
319 27 : else if( *pRun >= 'A' && *pRun <= 'F' )
320 27 : rResult = sal_Char( *pRun-'A' + 10 ) << 4;
321 80 : pRun++;
322 80 : if( *pRun != '>' && pRun - pStr < nLen )
323 : {
324 80 : if( *pRun >= '0' && *pRun <= '9' )
325 56 : rResult |= sal_Char( *pRun-'0' );
326 24 : else if( *pRun >= 'a' && *pRun <= 'f' )
327 0 : rResult |= sal_Char( *pRun-'a' + 10 );
328 24 : else if( *pRun >= 'A' && *pRun <= 'F' )
329 24 : rResult |= sal_Char( *pRun-'A' + 10 );
330 : }
331 80 : pRun++;
332 80 : aBuf.append( rResult );
333 : }
334 : }
335 :
336 5 : return aBuf.makeStringAndClear();
337 : }
338 :
339 814 : PDFNumber::~PDFNumber()
340 : {
341 814 : }
342 :
343 0 : bool PDFNumber::emit( EmitContext& rWriteContext ) const
344 : {
345 0 : OStringBuffer aBuf( 32 );
346 0 : aBuf.append( ' ' );
347 :
348 0 : double fValue = m_fValue;
349 0 : bool bNeg = false;
350 0 : int nPrecision = 5;
351 0 : if( fValue < 0.0 )
352 : {
353 0 : bNeg = true;
354 0 : fValue=-fValue;
355 : }
356 :
357 0 : sal_Int64 nInt = (sal_Int64)fValue;
358 0 : fValue -= (double)nInt;
359 : // optimizing hardware may lead to a value of 1.0 after the subtraction
360 0 : if( fValue == 1.0 || log10( 1.0-fValue ) <= -nPrecision )
361 : {
362 0 : nInt++;
363 0 : fValue = 0.0;
364 : }
365 0 : sal_Int64 nFrac = 0;
366 0 : if( fValue )
367 : {
368 0 : fValue *= pow( 10.0, (double)nPrecision );
369 0 : nFrac = (sal_Int64)fValue;
370 : }
371 0 : if( bNeg && ( nInt || nFrac ) )
372 0 : aBuf.append( '-' );
373 0 : aBuf.append( nInt );
374 0 : if( nFrac )
375 : {
376 : int i;
377 0 : aBuf.append( '.' );
378 0 : sal_Int64 nBound = (sal_Int64)(pow( 10.0, nPrecision - 1.0 )+0.5);
379 0 : for ( i = 0; ( i < nPrecision ) && nFrac; i++ )
380 : {
381 0 : sal_Int64 nNumb = nFrac / nBound;
382 0 : nFrac -= nNumb * nBound;
383 0 : aBuf.append( nNumb );
384 0 : nBound /= 10;
385 : }
386 : }
387 :
388 0 : return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
389 : }
390 :
391 0 : PDFEntry* PDFNumber::clone() const
392 : {
393 0 : return new PDFNumber( m_fValue );
394 : }
395 :
396 :
397 10 : PDFBool::~PDFBool()
398 : {
399 10 : }
400 :
401 0 : bool PDFBool::emit( EmitContext& rWriteContext ) const
402 : {
403 0 : return m_bValue ? rWriteContext.write( " true", 5 ) : rWriteContext.write( " false", 6 );
404 : }
405 :
406 0 : PDFEntry* PDFBool::clone() const
407 : {
408 0 : return new PDFBool( m_bValue );
409 : }
410 :
411 20 : PDFNull::~PDFNull()
412 : {
413 20 : }
414 :
415 0 : bool PDFNull::emit( EmitContext& rWriteContext ) const
416 : {
417 0 : return rWriteContext.write( " null", 5 );
418 : }
419 :
420 0 : PDFEntry* PDFNull::clone() const
421 : {
422 0 : return new PDFNull();
423 : }
424 :
425 :
426 244 : PDFObjectRef::~PDFObjectRef()
427 : {
428 244 : }
429 :
430 0 : bool PDFObjectRef::emit( EmitContext& rWriteContext ) const
431 : {
432 0 : OStringBuffer aBuf( 16 );
433 0 : aBuf.append( ' ' );
434 0 : aBuf.append( sal_Int32( m_nNumber ) );
435 0 : aBuf.append( ' ' );
436 0 : aBuf.append( sal_Int32( m_nGeneration ) );
437 0 : aBuf.append( " R", 2 );
438 0 : return rWriteContext.write( aBuf.getStr(), aBuf.getLength() );
439 : }
440 :
441 0 : PDFEntry* PDFObjectRef::clone() const
442 : {
443 0 : return new PDFObjectRef( m_nNumber, m_nGeneration );
444 : }
445 :
446 574 : PDFContainer::~PDFContainer()
447 : {
448 287 : int nEle = m_aSubElements.size();
449 1872 : for( int i = 0; i < nEle; i++ )
450 1585 : delete m_aSubElements[i];
451 287 : }
452 :
453 0 : bool PDFContainer::emitSubElements( EmitContext& rWriteContext ) const
454 : {
455 0 : int nEle = m_aSubElements.size();
456 0 : for( int i = 0; i < nEle; i++ )
457 : {
458 0 : if( rWriteContext.m_bDecrypt )
459 : {
460 0 : const PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i]);
461 0 : if (pName && pName->m_aName == "Encrypt")
462 : {
463 0 : i++;
464 0 : continue;
465 : }
466 : }
467 0 : if( ! m_aSubElements[i]->emit( rWriteContext ) )
468 0 : return false;
469 : }
470 0 : return true;
471 : }
472 :
473 0 : void PDFContainer::cloneSubElements( std::vector<PDFEntry*>& rNewSubElements ) const
474 : {
475 0 : int nEle = m_aSubElements.size();
476 0 : for( int i = 0; i < nEle; i++ )
477 0 : rNewSubElements.push_back( m_aSubElements[i]->clone() );
478 0 : }
479 :
480 0 : PDFObject* PDFContainer::findObject( unsigned int nNumber, unsigned int nGeneration ) const
481 : {
482 0 : unsigned int nEle = m_aSubElements.size();
483 0 : for( unsigned int i = 0; i < nEle; i++ )
484 : {
485 0 : PDFObject* pObject = dynamic_cast<PDFObject*>(m_aSubElements[i]);
486 0 : if( pObject &&
487 0 : pObject->m_nNumber == nNumber &&
488 0 : pObject->m_nGeneration == nGeneration )
489 : {
490 0 : return pObject;
491 : }
492 : }
493 0 : return NULL;
494 : }
495 :
496 126 : PDFArray::~PDFArray()
497 : {
498 126 : }
499 :
500 0 : bool PDFArray::emit( EmitContext& rWriteContext ) const
501 : {
502 0 : if( ! rWriteContext.write( "[", 1 ) )
503 0 : return false;
504 0 : if( ! emitSubElements( rWriteContext ) )
505 0 : return false;
506 0 : return rWriteContext.write( "]", 1 );
507 : }
508 :
509 0 : PDFEntry* PDFArray::clone() const
510 : {
511 0 : PDFArray* pNewAr = new PDFArray();
512 0 : cloneSubElements( pNewAr->m_aSubElements );
513 0 : return pNewAr;
514 : }
515 :
516 214 : PDFDict::~PDFDict()
517 : {
518 214 : }
519 :
520 0 : bool PDFDict::emit( EmitContext& rWriteContext ) const
521 : {
522 0 : if( ! rWriteContext.write( "<<\n", 3 ) )
523 0 : return false;
524 0 : if( ! emitSubElements( rWriteContext ) )
525 0 : return false;
526 0 : return rWriteContext.write( "\n>>\n", 4 );
527 : }
528 :
529 0 : void PDFDict::insertValue( const OString& rName, PDFEntry* pValue )
530 : {
531 0 : if( ! pValue )
532 0 : eraseValue( rName );
533 :
534 0 : std::unordered_map<OString,PDFEntry*,OStringHash>::iterator it = m_aMap.find( rName );
535 0 : if( it == m_aMap.end() )
536 : {
537 : // new name/value, pair, append it
538 0 : m_aSubElements.push_back( new PDFName( rName ) );
539 0 : m_aSubElements.push_back( pValue );
540 : }
541 : else
542 : {
543 0 : unsigned int nSub = m_aSubElements.size();
544 0 : for( unsigned int i = 0; i < nSub; i++ )
545 0 : if( m_aSubElements[i] == it->second )
546 0 : m_aSubElements[i] = pValue;
547 0 : delete it->second;
548 : }
549 0 : m_aMap[ rName ] = pValue;
550 0 : }
551 :
552 0 : void PDFDict::eraseValue( const OString& rName )
553 : {
554 0 : unsigned int nEle = m_aSubElements.size();
555 0 : for( unsigned int i = 0; i < nEle; i++ )
556 : {
557 0 : PDFName* pName = dynamic_cast<PDFName*>(m_aSubElements[i]);
558 0 : if( pName && pName->m_aName.equals( rName ) )
559 : {
560 0 : for( unsigned int j = i+1; j < nEle; j++ )
561 : {
562 0 : if( dynamic_cast<PDFComment*>(m_aSubElements[j]) == NULL )
563 : {
564 : // free name and value
565 0 : delete m_aSubElements[j];
566 0 : delete m_aSubElements[i];
567 : // remove subelements from vector
568 0 : m_aSubElements.erase( m_aSubElements.begin()+j );
569 0 : m_aSubElements.erase( m_aSubElements.begin()+i );
570 0 : buildMap();
571 0 : return;
572 : }
573 : }
574 : }
575 : }
576 : }
577 :
578 107 : PDFEntry* PDFDict::buildMap()
579 : {
580 : // clear map
581 107 : m_aMap.clear();
582 : // build map
583 107 : unsigned int nEle = m_aSubElements.size();
584 107 : PDFName* pName = NULL;
585 1123 : for( unsigned int i = 0; i < nEle; i++ )
586 : {
587 1016 : if( dynamic_cast<PDFComment*>(m_aSubElements[i]) == NULL )
588 : {
589 1016 : if( pName )
590 : {
591 508 : m_aMap[ pName->m_aName ] = m_aSubElements[i];
592 508 : pName = NULL;
593 : }
594 508 : else if( (pName = dynamic_cast<PDFName*>(m_aSubElements[i])) == NULL )
595 0 : return m_aSubElements[i];
596 : }
597 : }
598 107 : return pName;
599 : }
600 :
601 0 : PDFEntry* PDFDict::clone() const
602 : {
603 0 : PDFDict* pNewDict = new PDFDict();
604 0 : cloneSubElements( pNewDict->m_aSubElements );
605 0 : pNewDict->buildMap();
606 0 : return pNewDict;
607 : }
608 :
609 68 : PDFStream::~PDFStream()
610 : {
611 68 : }
612 :
613 0 : bool PDFStream::emit( EmitContext& rWriteContext ) const
614 : {
615 0 : return rWriteContext.copyOrigBytes( m_nBeginOffset, m_nEndOffset-m_nBeginOffset );
616 : }
617 :
618 0 : PDFEntry* PDFStream::clone() const
619 : {
620 0 : return new PDFStream( m_nBeginOffset, m_nEndOffset, NULL );
621 : }
622 :
623 0 : unsigned int PDFStream::getDictLength( const PDFContainer* pContainer ) const
624 : {
625 0 : if( ! m_pDict )
626 0 : return 0;
627 : // find /Length entry, can either be a direct or indirect number object
628 : std::unordered_map<OString,PDFEntry*,OStringHash>::const_iterator it =
629 0 : m_pDict->m_aMap.find( "Length" );
630 0 : if( it == m_pDict->m_aMap.end() )
631 0 : return 0;
632 0 : PDFNumber* pNum = dynamic_cast<PDFNumber*>(it->second);
633 0 : if( ! pNum && pContainer )
634 : {
635 0 : PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(it->second);
636 0 : if( pRef )
637 : {
638 0 : int nEle = pContainer->m_aSubElements.size();
639 0 : for( int i = 0; i < nEle && ! pNum; i++ )
640 : {
641 0 : PDFObject* pObj = dynamic_cast<PDFObject*>(pContainer->m_aSubElements[i]);
642 0 : if( pObj &&
643 0 : pObj->m_nNumber == pRef->m_nNumber &&
644 0 : pObj->m_nGeneration == pRef->m_nGeneration )
645 : {
646 0 : if( pObj->m_pObject )
647 0 : pNum = dynamic_cast<PDFNumber*>(pObj->m_pObject);
648 0 : break;
649 : }
650 : }
651 : }
652 : }
653 0 : return pNum ? static_cast<unsigned int>(pNum->m_fValue) : 0;
654 : }
655 :
656 214 : PDFObject::~PDFObject()
657 : {
658 214 : }
659 :
660 0 : bool PDFObject::getDeflatedStream( char** ppStream, unsigned int* pBytes, const PDFContainer* pObjectContainer, EmitContext& rContext ) const
661 : {
662 0 : bool bIsDeflated = false;
663 0 : if( m_pStream && m_pStream->m_pDict &&
664 0 : m_pStream->m_nEndOffset > m_pStream->m_nBeginOffset+15
665 : )
666 : {
667 0 : unsigned int nOuterStreamLen = m_pStream->m_nEndOffset - m_pStream->m_nBeginOffset;
668 0 : *ppStream = static_cast<char*>(rtl_allocateMemory( nOuterStreamLen ));
669 0 : unsigned int nRead = rContext.readOrigBytes( m_pStream->m_nBeginOffset, nOuterStreamLen, *ppStream );
670 0 : if( nRead != nOuterStreamLen )
671 : {
672 0 : rtl_freeMemory( *ppStream );
673 0 : *ppStream = NULL;
674 0 : *pBytes = 0;
675 0 : return false;
676 : }
677 : // is there a filter entry ?
678 : std::unordered_map<OString,PDFEntry*,OStringHash>::const_iterator it =
679 0 : m_pStream->m_pDict->m_aMap.find( "Filter" );
680 0 : if( it != m_pStream->m_pDict->m_aMap.end() )
681 : {
682 0 : PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
683 0 : if( ! pFilter )
684 : {
685 0 : PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
686 0 : if( pArray && ! pArray->m_aSubElements.empty() )
687 : {
688 0 : pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front());
689 : }
690 : }
691 :
692 : // is the (first) filter FlateDecode ?
693 0 : if (pFilter && pFilter->m_aName == "FlateDecode")
694 : {
695 0 : bIsDeflated = true;
696 : }
697 : }
698 : // prepare compressed data section
699 0 : char* pStream = *ppStream;
700 0 : if( pStream[0] == 's' )
701 0 : pStream += 6; // skip "stream"
702 : // skip line end after "stream"
703 0 : while( *pStream == '\r' || *pStream == '\n' )
704 0 : pStream++;
705 : // get the compressed length
706 0 : *pBytes = m_pStream->getDictLength( pObjectContainer );
707 0 : if( pStream != *ppStream )
708 0 : memmove( *ppStream, pStream, *pBytes );
709 0 : if( rContext.m_bDecrypt )
710 : {
711 0 : EmitImplData* pEData = getEmitData( rContext );
712 : pEData->decrypt( reinterpret_cast<const sal_uInt8*>(*ppStream),
713 : *pBytes,
714 : reinterpret_cast<sal_uInt8*>(*ppStream),
715 : m_nNumber,
716 : m_nGeneration
717 0 : ); // decrypt inplace
718 0 : }
719 : }
720 : else
721 0 : *ppStream = NULL, *pBytes = 0;
722 0 : return bIsDeflated;
723 : }
724 :
725 0 : static void unzipToBuffer( char* pBegin, unsigned int nLen,
726 : sal_uInt8** pOutBuf, sal_uInt32* pOutLen )
727 : {
728 : z_stream aZStr;
729 0 : aZStr.next_in = reinterpret_cast<Bytef *>(pBegin);
730 0 : aZStr.avail_in = nLen;
731 0 : aZStr.zalloc = nullptr;
732 0 : aZStr.zfree = nullptr;
733 0 : aZStr.opaque = nullptr;
734 :
735 0 : int err = inflateInit(&aZStr);
736 :
737 0 : const unsigned int buf_increment_size = 16384;
738 :
739 0 : *pOutBuf = static_cast<sal_uInt8*>(rtl_reallocateMemory( *pOutBuf, buf_increment_size ));
740 0 : aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf);
741 0 : aZStr.avail_out = buf_increment_size;
742 0 : *pOutLen = buf_increment_size;
743 0 : while( err != Z_STREAM_END && err >= Z_OK && aZStr.avail_in )
744 : {
745 0 : err = inflate( &aZStr, Z_NO_FLUSH );
746 0 : if( aZStr.avail_out == 0 )
747 : {
748 0 : if( err != Z_STREAM_END )
749 : {
750 0 : const int nNewAlloc = *pOutLen + buf_increment_size;
751 0 : *pOutBuf = static_cast<sal_uInt8*>(rtl_reallocateMemory( *pOutBuf, nNewAlloc ));
752 0 : aZStr.next_out = reinterpret_cast<Bytef*>(*pOutBuf + *pOutLen);
753 0 : aZStr.avail_out = buf_increment_size;
754 0 : *pOutLen = nNewAlloc;
755 : }
756 : }
757 : }
758 0 : if( err == Z_STREAM_END )
759 : {
760 0 : if( aZStr.avail_out > 0 )
761 0 : *pOutLen -= aZStr.avail_out;
762 : }
763 0 : inflateEnd(&aZStr);
764 0 : if( err < Z_OK )
765 : {
766 0 : rtl_freeMemory( *pOutBuf );
767 0 : *pOutBuf = NULL;
768 0 : *pOutLen = 0;
769 : }
770 0 : }
771 :
772 0 : bool PDFObject::writeStream( EmitContext& rWriteContext, const PDFFile* pParsedFile ) const
773 : {
774 0 : bool bSuccess = false;
775 0 : if( m_pStream )
776 : {
777 0 : char* pStream = NULL;
778 0 : unsigned int nBytes = 0;
779 0 : if( getDeflatedStream( &pStream, &nBytes, pParsedFile, rWriteContext ) && nBytes && rWriteContext.m_bDeflate )
780 : {
781 0 : sal_uInt8* pOutBytes = NULL;
782 0 : sal_uInt32 nOutBytes = 0;
783 0 : unzipToBuffer( pStream, nBytes, &pOutBytes, &nOutBytes );
784 0 : rWriteContext.write( pOutBytes, nOutBytes );
785 0 : rtl_freeMemory( pOutBytes );
786 : }
787 0 : else if( pStream && nBytes )
788 0 : rWriteContext.write( pStream, nBytes );
789 0 : rtl_freeMemory( pStream );
790 : }
791 0 : return bSuccess;
792 : }
793 :
794 0 : bool PDFObject::emit( EmitContext& rWriteContext ) const
795 : {
796 0 : if( ! rWriteContext.write( "\n", 1 ) )
797 0 : return false;
798 :
799 0 : EmitImplData* pEData = getEmitData( rWriteContext );
800 0 : if( pEData )
801 0 : pEData->insertXref( m_nNumber, m_nGeneration, rWriteContext.getCurPos() );
802 :
803 0 : OStringBuffer aBuf( 32 );
804 0 : aBuf.append( sal_Int32( m_nNumber ) );
805 0 : aBuf.append( ' ' );
806 0 : aBuf.append( sal_Int32( m_nGeneration ) );
807 0 : aBuf.append( " obj\n" );
808 0 : if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
809 0 : return false;
810 :
811 0 : if( pEData )
812 0 : pEData->setDecryptObject( m_nNumber, m_nGeneration );
813 0 : if( (rWriteContext.m_bDeflate || rWriteContext.m_bDecrypt) && pEData )
814 : {
815 0 : char* pStream = NULL;
816 0 : unsigned int nBytes = 0;
817 0 : bool bDeflate = getDeflatedStream( &pStream, &nBytes, pEData->m_pObjectContainer, rWriteContext );
818 0 : if( pStream && nBytes )
819 : {
820 : // unzip the stream
821 0 : sal_uInt8* pOutBytes = NULL;
822 0 : sal_uInt32 nOutBytes = 0;
823 0 : if( bDeflate && rWriteContext.m_bDeflate )
824 0 : unzipToBuffer( pStream, nBytes, &pOutBytes, &nOutBytes );
825 : else
826 : {
827 : // nothing to deflate, but decryption has happened
828 0 : pOutBytes = reinterpret_cast<sal_uInt8*>(pStream);
829 0 : nOutBytes = (sal_uInt32)nBytes;
830 : }
831 :
832 0 : if( nOutBytes )
833 : {
834 : // clone this object
835 0 : PDFObject* pClone = static_cast<PDFObject*>(clone());
836 : // set length in the dictionary to new stream length
837 0 : PDFNumber* pNewLen = new PDFNumber( double(nOutBytes) );
838 0 : pClone->m_pStream->m_pDict->insertValue( "Length", pNewLen );
839 :
840 0 : if( bDeflate && rWriteContext.m_bDeflate )
841 : {
842 : // delete flatedecode filter
843 : std::unordered_map<OString,PDFEntry*,OStringHash>::const_iterator it =
844 0 : pClone->m_pStream->m_pDict->m_aMap.find( "Filter" );
845 0 : if( it != pClone->m_pStream->m_pDict->m_aMap.end() )
846 : {
847 0 : PDFName* pFilter = dynamic_cast<PDFName*>(it->second);
848 0 : if (pFilter && pFilter->m_aName == "FlateDecode")
849 0 : pClone->m_pStream->m_pDict->eraseValue( "Filter" );
850 : else
851 : {
852 0 : PDFArray* pArray = dynamic_cast<PDFArray*>(it->second);
853 0 : if( pArray && ! pArray->m_aSubElements.empty() )
854 : {
855 0 : pFilter = dynamic_cast<PDFName*>(pArray->m_aSubElements.front());
856 0 : if (pFilter && pFilter->m_aName == "FlateDecode")
857 : {
858 0 : delete pFilter;
859 0 : pArray->m_aSubElements.erase( pArray->m_aSubElements.begin() );
860 : }
861 : }
862 : }
863 : }
864 : }
865 :
866 : // write sub elements except stream
867 0 : bool bRet = true;
868 0 : unsigned int nEle = pClone->m_aSubElements.size();
869 0 : for( unsigned int i = 0; i < nEle && bRet; i++ )
870 : {
871 0 : if( pClone->m_aSubElements[i] != pClone->m_pStream )
872 0 : bRet = pClone->m_aSubElements[i]->emit( rWriteContext );
873 : }
874 0 : delete pClone;
875 : // write stream
876 0 : if( bRet )
877 0 : rWriteContext.write( "stream\n", 7 );
878 0 : if( bRet )
879 0 : bRet = rWriteContext.write( pOutBytes, nOutBytes );
880 0 : if( bRet )
881 0 : bRet = rWriteContext.write( "\nendstream\nendobj\n", 18 );
882 0 : if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream) )
883 0 : rtl_freeMemory( pOutBytes );
884 0 : rtl_freeMemory( pStream );
885 0 : if( pEData )
886 0 : pEData->setDecryptObject( 0, 0 );
887 0 : return bRet;
888 : }
889 0 : if( pOutBytes != reinterpret_cast<sal_uInt8*>(pStream) )
890 0 : rtl_freeMemory( pOutBytes );
891 : }
892 0 : rtl_freeMemory( pStream );
893 : }
894 :
895 0 : bool bRet = emitSubElements( rWriteContext ) &&
896 0 : rWriteContext.write( "\nendobj\n", 8 );
897 0 : if( pEData )
898 0 : pEData->setDecryptObject( 0, 0 );
899 0 : return bRet;
900 : }
901 :
902 0 : PDFEntry* PDFObject::clone() const
903 : {
904 0 : PDFObject* pNewOb = new PDFObject( m_nNumber, m_nGeneration );
905 0 : cloneSubElements( pNewOb->m_aSubElements );
906 0 : unsigned int nEle = m_aSubElements.size();
907 0 : for( unsigned int i = 0; i < nEle; i++ )
908 : {
909 0 : if( m_aSubElements[i] == m_pObject )
910 0 : pNewOb->m_pObject = pNewOb->m_aSubElements[i];
911 0 : else if( m_aSubElements[i] == m_pStream && pNewOb->m_pObject )
912 : {
913 0 : pNewOb->m_pStream = dynamic_cast<PDFStream*>(pNewOb->m_aSubElements[i]);
914 0 : PDFDict* pNewDict = dynamic_cast<PDFDict*>(pNewOb->m_pObject);
915 0 : if (pNewDict && pNewOb->m_pStream)
916 0 : pNewOb->m_pStream->m_pDict = pNewDict;
917 : }
918 : }
919 0 : return pNewOb;
920 : }
921 :
922 10 : PDFTrailer::~PDFTrailer()
923 : {
924 10 : }
925 :
926 0 : bool PDFTrailer::emit( EmitContext& rWriteContext ) const
927 : {
928 : // get xref offset
929 0 : unsigned int nXRefPos = rWriteContext.getCurPos();
930 : // begin xref section, object 0 is always free
931 0 : if( ! rWriteContext.write( "xref\r\n"
932 : "0 1\r\n"
933 0 : "0000000000 65535 f\r\n", 31 ) )
934 0 : return false;
935 : // check if we are emitting a complete PDF file
936 0 : EmitImplData* pEData = getEmitData( rWriteContext );
937 0 : if( pEData )
938 : {
939 : // emit object xrefs
940 0 : const EmitImplData::XRefTable& rXRefs = pEData->m_aXRefTable;
941 0 : EmitImplData::XRefTable::const_iterator section_begin, section_end;
942 0 : section_begin = rXRefs.begin();
943 0 : while( section_begin != rXRefs.end() )
944 : {
945 : // find end of continuous object numbers
946 0 : section_end = section_begin;
947 0 : unsigned int nLast = section_begin->first;
948 0 : while( (++section_end) != rXRefs.end() &&
949 0 : section_end->first == nLast+1 )
950 0 : nLast = section_end->first;
951 : // write first object number and number of following entries
952 0 : OStringBuffer aBuf( 21 );
953 0 : aBuf.append( sal_Int32( section_begin->first ) );
954 0 : aBuf.append( ' ' );
955 0 : aBuf.append( sal_Int32(nLast - section_begin->first + 1) );
956 0 : aBuf.append( "\r\n" );
957 0 : if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
958 0 : return false;
959 0 : while( section_begin != section_end )
960 : {
961 : // write 20 char entry of form
962 : // 0000offset 00gen n\r\n
963 0 : aBuf.setLength( 0 );
964 0 : OString aOffset( OString::number( section_begin->second.second ) );
965 0 : int nPad = 10 - aOffset.getLength();
966 0 : for( int i = 0; i < nPad; i++ )
967 0 : aBuf.append( '0' );
968 0 : aBuf.append( aOffset );
969 0 : aBuf.append( ' ' );
970 0 : OString aGeneration( OString::number( section_begin->second.first ) );
971 0 : nPad = 5 - aGeneration.getLength();
972 0 : for( int i = 0; i < nPad; i++ )
973 0 : aBuf.append( '0' );
974 0 : aBuf.append( aGeneration );
975 0 : aBuf.append( " n\r\n" );
976 0 : if( ! rWriteContext.write( aBuf.getStr(), 20 ) )
977 0 : return false;
978 0 : ++section_begin;
979 0 : }
980 0 : }
981 : }
982 0 : if( ! rWriteContext.write( "trailer\n", 8 ) )
983 0 : return false;
984 0 : if( ! emitSubElements( rWriteContext ) )
985 0 : return false;
986 0 : if( ! rWriteContext.write( "startxref\n", 10 ) )
987 0 : return false;
988 0 : OString aOffset( OString::number( nXRefPos ) );
989 0 : if( ! rWriteContext.write( aOffset.getStr(), aOffset.getLength() ) )
990 0 : return false;
991 0 : return rWriteContext.write( "\n%%EOF\n", 7 );
992 : }
993 :
994 0 : PDFEntry* PDFTrailer::clone() const
995 : {
996 0 : PDFTrailer* pNewTr = new PDFTrailer();
997 0 : cloneSubElements( pNewTr->m_aSubElements );
998 0 : unsigned int nEle = m_aSubElements.size();
999 0 : for( unsigned int i = 0; i < nEle; i++ )
1000 : {
1001 0 : if( m_aSubElements[i] == m_pDict )
1002 : {
1003 0 : pNewTr->m_pDict = dynamic_cast<PDFDict*>(pNewTr->m_aSubElements[i]);
1004 0 : break;
1005 : }
1006 : }
1007 0 : return pNewTr;
1008 : }
1009 :
1010 : #define ENCRYPTION_KEY_LEN 16
1011 : #define ENCRYPTION_BUF_LEN 32
1012 :
1013 : namespace pdfparse {
1014 : struct PDFFileImplData
1015 : {
1016 : bool m_bIsEncrypted;
1017 : bool m_bStandardHandler;
1018 : sal_uInt32 m_nAlgoVersion;
1019 : sal_uInt32 m_nStandardRevision;
1020 : sal_uInt32 m_nKeyLength;
1021 : sal_uInt8 m_aOEntry[32];
1022 : sal_uInt8 m_aUEntry[32];
1023 : sal_uInt32 m_nPEntry;
1024 : OString m_aDocID;
1025 : rtlCipher m_aCipher;
1026 : rtlDigest m_aDigest;
1027 :
1028 : sal_uInt8 m_aDecryptionKey[ENCRYPTION_KEY_LEN+5]; // maximum handled key length
1029 :
1030 5 : PDFFileImplData() :
1031 : m_bIsEncrypted( false ),
1032 : m_bStandardHandler( false ),
1033 : m_nAlgoVersion( 0 ),
1034 : m_nStandardRevision( 0 ),
1035 : m_nKeyLength( 0 ),
1036 : m_nPEntry( 0 ),
1037 : m_aCipher( NULL ),
1038 5 : m_aDigest( NULL )
1039 : {
1040 5 : memset( m_aOEntry, 0, sizeof( m_aOEntry ) );
1041 5 : memset( m_aUEntry, 0, sizeof( m_aUEntry ) );
1042 5 : memset( m_aDecryptionKey, 0, sizeof( m_aDecryptionKey ) );
1043 5 : }
1044 :
1045 5 : ~PDFFileImplData()
1046 5 : {
1047 5 : if( m_aCipher )
1048 0 : rtl_cipher_destroyARCFOUR( m_aCipher );
1049 5 : if( m_aDigest )
1050 0 : rtl_digest_destroyMD5( m_aDigest );
1051 5 : }
1052 : };
1053 : }
1054 :
1055 15 : PDFFile::~PDFFile()
1056 : {
1057 5 : if( m_pData )
1058 5 : delete m_pData;
1059 10 : }
1060 :
1061 5 : bool PDFFile::isEncrypted() const
1062 : {
1063 5 : return impl_getData()->m_bIsEncrypted;
1064 : }
1065 :
1066 0 : bool PDFFile::decrypt( const sal_uInt8* pInBuffer, sal_uInt32 nLen, sal_uInt8* pOutBuffer,
1067 : unsigned int nObject, unsigned int nGeneration ) const
1068 : {
1069 0 : if( ! isEncrypted() )
1070 0 : return false;
1071 :
1072 0 : if( ! m_pData->m_aCipher )
1073 0 : m_pData->m_aCipher = rtl_cipher_createARCFOUR( rtl_Cipher_ModeStream );
1074 :
1075 : // modify encryption key
1076 0 : sal_uInt32 i = m_pData->m_nKeyLength;
1077 0 : m_pData->m_aDecryptionKey[i++] = sal_uInt8(nObject&0xff);
1078 0 : m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>8)&0xff);
1079 0 : m_pData->m_aDecryptionKey[i++] = sal_uInt8((nObject>>16)&0xff);
1080 0 : m_pData->m_aDecryptionKey[i++] = sal_uInt8(nGeneration&0xff);
1081 0 : m_pData->m_aDecryptionKey[i++] = sal_uInt8((nGeneration>>8)&0xff);
1082 :
1083 : sal_uInt8 aSum[ENCRYPTION_KEY_LEN];
1084 0 : rtl_digest_updateMD5( m_pData->m_aDigest, m_pData->m_aDecryptionKey, i );
1085 0 : rtl_digest_getMD5( m_pData->m_aDigest, aSum, sizeof( aSum ) );
1086 :
1087 0 : if( i > 16 )
1088 0 : i = 16;
1089 :
1090 : rtlCipherError aErr = rtl_cipher_initARCFOUR( m_pData->m_aCipher,
1091 : rtl_Cipher_DirectionDecode,
1092 : aSum, i,
1093 0 : NULL, 0 );
1094 0 : if( aErr == rtl_Cipher_E_None )
1095 : aErr = rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1096 : pInBuffer, nLen,
1097 0 : pOutBuffer, nLen );
1098 0 : return aErr == rtl_Cipher_E_None;
1099 : }
1100 :
1101 : static const sal_uInt8 nPadString[32] =
1102 : {
1103 : 0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
1104 : 0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
1105 : };
1106 :
1107 0 : static void pad_or_truncate_to_32( const OString& rStr, sal_Char* pBuffer )
1108 : {
1109 0 : int nLen = rStr.getLength();
1110 0 : if( nLen > 32 )
1111 0 : nLen = 32;
1112 0 : const sal_Char* pStr = rStr.getStr();
1113 0 : memcpy( pBuffer, pStr, nLen );
1114 0 : int i = 0;
1115 0 : while( nLen < 32 )
1116 0 : pBuffer[nLen++] = nPadString[i++];
1117 0 : }
1118 :
1119 : // pass at least pData->m_nKeyLength bytes in
1120 0 : static sal_uInt32 password_to_key( const OString& rPwd, sal_uInt8* pOutKey, PDFFileImplData* pData, bool bComputeO )
1121 : {
1122 : // see PDF reference 1.4 Algorithm 3.2
1123 : // encrypt pad string
1124 : sal_Char aPadPwd[ENCRYPTION_BUF_LEN];
1125 0 : pad_or_truncate_to_32( rPwd, aPadPwd );
1126 0 : rtl_digest_updateMD5( pData->m_aDigest, aPadPwd, sizeof( aPadPwd ) );
1127 0 : if( ! bComputeO )
1128 : {
1129 0 : rtl_digest_updateMD5( pData->m_aDigest, pData->m_aOEntry, 32 );
1130 : sal_uInt8 aPEntry[4];
1131 0 : aPEntry[0] = static_cast<sal_uInt8>(pData->m_nPEntry & 0xff);
1132 0 : aPEntry[1] = static_cast<sal_uInt8>((pData->m_nPEntry >> 8 ) & 0xff);
1133 0 : aPEntry[2] = static_cast<sal_uInt8>((pData->m_nPEntry >> 16) & 0xff);
1134 0 : aPEntry[3] = static_cast<sal_uInt8>((pData->m_nPEntry >> 24) & 0xff);
1135 0 : rtl_digest_updateMD5( pData->m_aDigest, aPEntry, sizeof(aPEntry) );
1136 0 : rtl_digest_updateMD5( pData->m_aDigest, pData->m_aDocID.getStr(), pData->m_aDocID.getLength() );
1137 : }
1138 : sal_uInt8 nSum[RTL_DIGEST_LENGTH_MD5];
1139 0 : rtl_digest_getMD5( pData->m_aDigest, nSum, sizeof(nSum) );
1140 0 : if( pData->m_nStandardRevision == 3 )
1141 : {
1142 0 : for( int i = 0; i < 50; i++ )
1143 : {
1144 0 : rtl_digest_updateMD5( pData->m_aDigest, nSum, sizeof(nSum) );
1145 0 : rtl_digest_getMD5( pData->m_aDigest, nSum, sizeof(nSum) );
1146 : }
1147 : }
1148 0 : sal_uInt32 nLen = pData->m_nKeyLength;
1149 0 : if( nLen > RTL_DIGEST_LENGTH_MD5 )
1150 0 : nLen = RTL_DIGEST_LENGTH_MD5;
1151 0 : memcpy( pOutKey, nSum, nLen );
1152 0 : return nLen;
1153 : }
1154 :
1155 0 : static bool check_user_password( const OString& rPwd, PDFFileImplData* pData )
1156 : {
1157 : // see PDF reference 1.4 Algorithm 3.6
1158 0 : bool bValid = false;
1159 : sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
1160 : sal_uInt8 nEncryptedEntry[ENCRYPTION_BUF_LEN];
1161 0 : memset( nEncryptedEntry, 0, sizeof(nEncryptedEntry) );
1162 0 : sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, pData, false );
1163 : // save (at this time potential) decryption key for later use
1164 0 : memcpy( pData->m_aDecryptionKey, aKey, nKeyLen );
1165 0 : if( pData->m_nStandardRevision == 2 )
1166 : {
1167 : // see PDF reference 1.4 Algorithm 3.4
1168 : // encrypt pad string
1169 : rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1170 : aKey, nKeyLen,
1171 0 : NULL, 0 );
1172 : rtl_cipher_encodeARCFOUR( pData->m_aCipher, nPadString, sizeof( nPadString ),
1173 0 : nEncryptedEntry, sizeof( nEncryptedEntry ) );
1174 0 : bValid = (memcmp( nEncryptedEntry, pData->m_aUEntry, 32 ) == 0);
1175 : }
1176 0 : else if( pData->m_nStandardRevision == 3 )
1177 : {
1178 : // see PDF reference 1.4 Algorithm 3.5
1179 0 : rtl_digest_updateMD5( pData->m_aDigest, nPadString, sizeof( nPadString ) );
1180 0 : rtl_digest_updateMD5( pData->m_aDigest, pData->m_aDocID.getStr(), pData->m_aDocID.getLength() );
1181 0 : rtl_digest_getMD5( pData->m_aDigest, nEncryptedEntry, sizeof(nEncryptedEntry) );
1182 : rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1183 0 : aKey, sizeof(aKey), NULL, 0 );
1184 : rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1185 : nEncryptedEntry, 16,
1186 0 : nEncryptedEntry, 16 ); // encrypt in place
1187 0 : for( int i = 1; i <= 19; i++ ) // do it 19 times, start with 1
1188 : {
1189 : sal_uInt8 aTempKey[ENCRYPTION_KEY_LEN];
1190 0 : for( sal_uInt32 j = 0; j < sizeof(aTempKey); j++ )
1191 0 : aTempKey[j] = static_cast<sal_uInt8>( aKey[j] ^ i );
1192 :
1193 : rtl_cipher_initARCFOUR( pData->m_aCipher, rtl_Cipher_DirectionEncode,
1194 0 : aTempKey, sizeof(aTempKey), NULL, 0 );
1195 : rtl_cipher_encodeARCFOUR( pData->m_aCipher,
1196 : nEncryptedEntry, 16,
1197 0 : nEncryptedEntry, 16 ); // encrypt in place
1198 : }
1199 0 : bValid = (memcmp( nEncryptedEntry, pData->m_aUEntry, 16 ) == 0);
1200 : }
1201 0 : return bValid;
1202 : }
1203 :
1204 0 : bool PDFFile::usesSupportedEncryptionFormat() const
1205 : {
1206 0 : return m_pData->m_bStandardHandler &&
1207 0 : m_pData->m_nAlgoVersion >= 1 &&
1208 0 : m_pData->m_nAlgoVersion <= 2 &&
1209 0 : m_pData->m_nStandardRevision >= 2 &&
1210 0 : m_pData->m_nStandardRevision <= 3;
1211 : }
1212 :
1213 0 : bool PDFFile::setupDecryptionData( const OString& rPwd ) const
1214 : {
1215 0 : if( !impl_getData()->m_bIsEncrypted )
1216 0 : return rPwd.isEmpty();
1217 :
1218 : // check if we can handle this encryption at all
1219 0 : if( ! usesSupportedEncryptionFormat() )
1220 0 : return false;
1221 :
1222 0 : if( ! m_pData->m_aCipher )
1223 0 : m_pData->m_aCipher = rtl_cipher_createARCFOUR(rtl_Cipher_ModeStream);
1224 0 : if( ! m_pData->m_aDigest )
1225 0 : m_pData->m_aDigest = rtl_digest_createMD5();
1226 :
1227 : // first try user password
1228 0 : bool bValid = check_user_password( rPwd, m_pData );
1229 :
1230 0 : if( ! bValid )
1231 : {
1232 : // try owner password
1233 : // see PDF reference 1.4 Algorithm 3.7
1234 : sal_uInt8 aKey[ENCRYPTION_KEY_LEN];
1235 : sal_uInt8 nPwd[ENCRYPTION_BUF_LEN];
1236 0 : memset( nPwd, 0, sizeof(nPwd) );
1237 0 : sal_uInt32 nKeyLen = password_to_key( rPwd, aKey, m_pData, true );
1238 0 : if( m_pData->m_nStandardRevision == 2 )
1239 : {
1240 : rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1241 0 : aKey, nKeyLen, NULL, 0 );
1242 : rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1243 : m_pData->m_aOEntry, 32,
1244 0 : nPwd, 32 );
1245 : }
1246 0 : else if( m_pData->m_nStandardRevision == 3 )
1247 : {
1248 0 : memcpy( nPwd, m_pData->m_aOEntry, 32 );
1249 0 : for( int i = 19; i >= 0; i-- )
1250 : {
1251 : sal_uInt8 nTempKey[ENCRYPTION_KEY_LEN];
1252 0 : for( unsigned int j = 0; j < sizeof(nTempKey); j++ )
1253 0 : nTempKey[j] = sal_uInt8(aKey[j] ^ i);
1254 : rtl_cipher_initARCFOUR( m_pData->m_aCipher, rtl_Cipher_DirectionDecode,
1255 0 : nTempKey, nKeyLen, NULL, 0 );
1256 : rtl_cipher_decodeARCFOUR( m_pData->m_aCipher,
1257 : nPwd, 32,
1258 0 : nPwd, 32 ); // decrypt inplace
1259 : }
1260 : }
1261 0 : bValid = check_user_password( OString( reinterpret_cast<char*>(nPwd), 32 ), m_pData );
1262 : }
1263 :
1264 0 : return bValid;
1265 : }
1266 :
1267 0 : OUString PDFFile::getDecryptionKey() const
1268 : {
1269 0 : OUStringBuffer aBuf( ENCRYPTION_KEY_LEN * 2 );
1270 0 : if( impl_getData()->m_bIsEncrypted )
1271 : {
1272 0 : for( sal_uInt32 i = 0; i < m_pData->m_nKeyLength; i++ )
1273 : {
1274 : static const sal_Unicode pHexTab[16] = { '0', '1', '2', '3', '4', '5', '6', '7',
1275 : '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
1276 0 : aBuf.append( pHexTab[(m_pData->m_aDecryptionKey[i] >> 4) & 0x0f] );
1277 0 : aBuf.append( pHexTab[(m_pData->m_aDecryptionKey[i] & 0x0f)] );
1278 : }
1279 :
1280 : }
1281 0 : return aBuf.makeStringAndClear();
1282 : }
1283 :
1284 5 : PDFFileImplData* PDFFile::impl_getData() const
1285 : {
1286 5 : if( m_pData )
1287 0 : return m_pData;
1288 5 : m_pData = new PDFFileImplData();
1289 : // check for encryption dict in a trailer
1290 5 : unsigned int nElements = m_aSubElements.size();
1291 127 : while( nElements-- > 0 )
1292 : {
1293 117 : PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(m_aSubElements[nElements]);
1294 117 : if( pTrailer && pTrailer->m_pDict )
1295 : {
1296 : // search doc id
1297 5 : PDFDict::Map::iterator doc_id = pTrailer->m_pDict->m_aMap.find( "ID" );
1298 5 : if( doc_id != pTrailer->m_pDict->m_aMap.end() )
1299 : {
1300 5 : PDFArray* pArr = dynamic_cast<PDFArray*>(doc_id->second);
1301 5 : if( pArr && pArr->m_aSubElements.size() > 0 )
1302 : {
1303 5 : PDFString* pStr = dynamic_cast<PDFString*>(pArr->m_aSubElements[0]);
1304 5 : if( pStr )
1305 5 : m_pData->m_aDocID = pStr->getFilteredString();
1306 : #if OSL_DEBUG_LEVEL > 1
1307 : OUString aTmp;
1308 : for( int i = 0; i < m_pData->m_aDocID.getLength(); i++ )
1309 : aTmp += OUString::number((unsigned int)sal_uInt8(m_pData->m_aDocID[i]), 16);
1310 : SAL_INFO("sdext.pdfimport.pdfparse", "DocId is <" << OUStringToOString(aTmp, RTL_TEXTENCODING_UTF8).getStr() << ">");
1311 : #endif
1312 : }
1313 : }
1314 : // search Encrypt entry
1315 : PDFDict::Map::iterator enc =
1316 5 : pTrailer->m_pDict->m_aMap.find( "Encrypt" );
1317 5 : if( enc != pTrailer->m_pDict->m_aMap.end() )
1318 : {
1319 0 : PDFDict* pDict = dynamic_cast<PDFDict*>(enc->second);
1320 0 : if( ! pDict )
1321 : {
1322 0 : PDFObjectRef* pRef = dynamic_cast<PDFObjectRef*>(enc->second);
1323 0 : if( pRef )
1324 : {
1325 0 : PDFObject* pObj = findObject( pRef );
1326 0 : if( pObj && pObj->m_pObject )
1327 0 : pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
1328 : }
1329 : }
1330 0 : if( pDict )
1331 : {
1332 0 : PDFDict::Map::iterator filter = pDict->m_aMap.find( "Filter" );
1333 0 : PDFDict::Map::iterator version = pDict->m_aMap.find( "V" );
1334 0 : PDFDict::Map::iterator len = pDict->m_aMap.find( "Length" );
1335 0 : PDFDict::Map::iterator o_ent = pDict->m_aMap.find( "O" );
1336 0 : PDFDict::Map::iterator u_ent = pDict->m_aMap.find( "U" );
1337 0 : PDFDict::Map::iterator r_ent = pDict->m_aMap.find( "R" );
1338 0 : PDFDict::Map::iterator p_ent = pDict->m_aMap.find( "P" );
1339 0 : if( filter != pDict->m_aMap.end() )
1340 : {
1341 0 : m_pData->m_bIsEncrypted = true;
1342 0 : m_pData->m_nKeyLength = 5;
1343 0 : if( version != pDict->m_aMap.end() )
1344 : {
1345 0 : PDFNumber* pNum = dynamic_cast<PDFNumber*>(version->second);
1346 0 : if( pNum )
1347 0 : m_pData->m_nAlgoVersion = static_cast<sal_uInt32>(pNum->m_fValue);
1348 : }
1349 0 : if( m_pData->m_nAlgoVersion >= 3 )
1350 0 : m_pData->m_nKeyLength = 16;
1351 0 : if( len != pDict->m_aMap.end() )
1352 : {
1353 0 : PDFNumber* pNum = dynamic_cast<PDFNumber*>(len->second);
1354 0 : if( pNum )
1355 0 : m_pData->m_nKeyLength = static_cast<sal_uInt32>(pNum->m_fValue) / 8;
1356 : }
1357 0 : PDFName* pFilter = dynamic_cast<PDFName*>(filter->second);
1358 0 : if( pFilter && pFilter->getFilteredName() == "Standard" )
1359 0 : m_pData->m_bStandardHandler = true;
1360 0 : if( o_ent != pDict->m_aMap.end() )
1361 : {
1362 0 : PDFString* pString = dynamic_cast<PDFString*>(o_ent->second);
1363 0 : if( pString )
1364 : {
1365 0 : OString aEnt = pString->getFilteredString();
1366 0 : if( aEnt.getLength() == 32 )
1367 0 : memcpy( m_pData->m_aOEntry, aEnt.getStr(), 32 );
1368 : #if OSL_DEBUG_LEVEL > 1
1369 : else
1370 : {
1371 : OUString aTmp;
1372 : for( int i = 0; i < aEnt.getLength(); i++ )
1373 : aTmp += " " + OUString::number((unsigned int)sal_uInt8(aEnt[i]), 16);
1374 : SAL_WARN("sdext.pdfimport.pdfparse",
1375 : "O entry has length " << (int)aEnt.getLength() << ", should be 32 <" << OUStringToOString(aTmp, RTL_TEXTENCODING_UTF8).getStr() << ">" );
1376 : }
1377 : #endif
1378 : }
1379 : }
1380 0 : if( u_ent != pDict->m_aMap.end() )
1381 : {
1382 0 : PDFString* pString = dynamic_cast<PDFString*>(u_ent->second);
1383 0 : if( pString )
1384 : {
1385 0 : OString aEnt = pString->getFilteredString();
1386 0 : if( aEnt.getLength() == 32 )
1387 0 : memcpy( m_pData->m_aUEntry, aEnt.getStr(), 32 );
1388 : #if OSL_DEBUG_LEVEL > 1
1389 : else
1390 : {
1391 : OUString aTmp;
1392 : for( int i = 0; i < aEnt.getLength(); i++ )
1393 : aTmp += " " + OUString::number((unsigned int)sal_uInt8(aEnt[i]), 16);
1394 : SAL_WARN("sdext.pdfimport.pdfparse",
1395 : "U entry has length " << (int)aEnt.getLength() << ", should be 32 <" << OUStringToOString(aTmp, RTL_TEXTENCODING_UTF8).getStr() << ">" );
1396 : }
1397 : #endif
1398 : }
1399 : }
1400 0 : if( r_ent != pDict->m_aMap.end() )
1401 : {
1402 0 : PDFNumber* pNum = dynamic_cast<PDFNumber*>(r_ent->second);
1403 0 : if( pNum )
1404 0 : m_pData->m_nStandardRevision = static_cast<sal_uInt32>(pNum->m_fValue);
1405 : }
1406 0 : if( p_ent != pDict->m_aMap.end() )
1407 : {
1408 0 : PDFNumber* pNum = dynamic_cast<PDFNumber*>(p_ent->second);
1409 0 : if( pNum )
1410 0 : m_pData->m_nPEntry = static_cast<sal_uInt32>(static_cast<sal_Int32>(pNum->m_fValue));
1411 : SAL_INFO("sdext.pdfimport.pdfparse", "p entry is " << m_pData->m_nPEntry );
1412 : }
1413 :
1414 : SAL_INFO("sdext.pdfimport.pdfparse", "Encryption dict: sec handler: " << (pFilter ? OUStringToOString( pFilter->getFilteredName(), RTL_TEXTENCODING_UTF8 ).getStr() : "<unknown>") << ", version = " << (int)m_pData->m_nAlgoVersion << ", revision = " << (int)m_pData->m_nStandardRevision << ", key length = " << m_pData->m_nKeyLength );
1415 0 : break;
1416 : }
1417 : }
1418 : }
1419 : }
1420 : }
1421 :
1422 5 : return m_pData;
1423 : }
1424 :
1425 0 : bool PDFFile::emit( EmitContext& rWriteContext ) const
1426 : {
1427 0 : setEmitData( rWriteContext, new EmitImplData( this ) );
1428 :
1429 0 : OStringBuffer aBuf( 32 );
1430 0 : aBuf.append( "%PDF-" );
1431 0 : aBuf.append( sal_Int32( m_nMajor ) );
1432 0 : aBuf.append( '.' );
1433 0 : aBuf.append( sal_Int32( m_nMinor ) );
1434 0 : aBuf.append( "\n" );
1435 0 : if( ! rWriteContext.write( aBuf.getStr(), aBuf.getLength() ) )
1436 0 : return false;
1437 0 : return emitSubElements( rWriteContext );
1438 : }
1439 :
1440 0 : PDFEntry* PDFFile::clone() const
1441 : {
1442 0 : PDFFile* pNewFl = new PDFFile();
1443 0 : pNewFl->m_nMajor = m_nMajor;
1444 0 : pNewFl->m_nMinor = m_nMinor;
1445 0 : cloneSubElements( pNewFl->m_aSubElements );
1446 0 : return pNewFl;
1447 : }
1448 :
1449 0 : PDFPart::~PDFPart()
1450 : {
1451 0 : }
1452 :
1453 0 : bool PDFPart::emit( EmitContext& rWriteContext ) const
1454 : {
1455 0 : return emitSubElements( rWriteContext );
1456 : }
1457 :
1458 0 : PDFEntry* PDFPart::clone() const
1459 : {
1460 0 : PDFPart* pNewPt = new PDFPart();
1461 0 : cloneSubElements( pNewPt->m_aSubElements );
1462 0 : return pNewPt;
1463 : }
1464 :
1465 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|