Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <iodetect.hxx>
21 : #include <boost/scoped_array.hpp>
22 : #include <osl/endian.h>
23 : #include <sot/storage.hxx>
24 : #include <svtools/parhtml.hxx>
25 : #include <tools/urlobj.hxx>
26 : #include <unotools/moduleoptions.hxx>
27 :
28 0 : static bool IsDocShellRegistered()
29 : {
30 0 : return SvtModuleOptions().IsWriter();
31 : }
32 :
33 0 : SwIoDetect aFilterDetect[] =
34 : {
35 : SwIoDetect( FILTER_RTF ),
36 : SwIoDetect( FILTER_BAS ),
37 : SwIoDetect( sWW6 ),
38 : SwIoDetect( FILTER_WW8 ),
39 : SwIoDetect( sRtfWH ),
40 : SwIoDetect( sHTML ),
41 : SwIoDetect( sWW1 ),
42 : SwIoDetect( sWW5 ),
43 : SwIoDetect( FILTER_XML ),
44 : SwIoDetect( FILTER_TEXT_DLG ),
45 : SwIoDetect( FILTER_TEXT )
46 0 : };
47 :
48 0 : OUString SwIoDetect::IsReader(const sal_Char* pHeader, sal_uLong nLen_) const
49 : {
50 : // Filter recognition
51 : struct W1_FIB
52 : {
53 : SVBT16 wIdent; // 0x0 int magic number
54 : SVBT16 nFib; // 0x2 FIB version written
55 : SVBT16 nProduct; // 0x4 product version written by
56 : SVBT16 nlocale; // 0x6 language stamp---localized version;
57 : SVBT16 pnNext; // 0x8
58 : SVBT16 fFlags;
59 :
60 0 : sal_uInt16 nFibGet() { return SVBT16ToShort(nFib); }
61 0 : sal_uInt16 wIdentGet() { return SVBT16ToShort(wIdent); }
62 0 : sal_uInt16 fFlagsGet() { return SVBT16ToShort(fFlags); }
63 : // SVBT16 fComplex :1;// 0004 when 1, file is in complex, fast-saved format.
64 0 : sal_Bool fComplexGet() { return static_cast< sal_Bool >((fFlagsGet() >> 2) & 1); }
65 : };
66 :
67 0 : bool bRet = false;
68 0 : if ( sHTML == sName )
69 0 : bRet = HTMLParser::IsHTMLFormat( pHeader, true, RTL_TEXTENCODING_DONTKNOW );
70 0 : else if ( FILTER_RTF == sName )
71 0 : bRet = 0 == strncmp( "{\\rtf", pHeader, 5 );
72 0 : else if ( sWW5 == sName )
73 : {
74 0 : W1_FIB *pW1Header = (W1_FIB*)pHeader;
75 0 : if (pW1Header->wIdentGet() == 0xA5DC && pW1Header->nFibGet() == 0x65)
76 0 : bRet = true; /*WW5*/
77 0 : else if (pW1Header->wIdentGet() == 0xA5DB && pW1Header->nFibGet() == 0x2D)
78 0 : bRet = true; /*WW2*/
79 : }
80 0 : else if ( sWW1 == sName )
81 : {
82 0 : bRet = (( ((W1_FIB*)pHeader)->wIdentGet() == 0xA59C
83 0 : && ((W1_FIB*)pHeader)->nFibGet() == 0x21)
84 0 : && ((W1_FIB*)pHeader)->fComplexGet() == 0);
85 : }
86 0 : else if ( FILTER_TEXT == sName )
87 0 : bRet = SwIoSystem::IsDetectableText(pHeader, nLen_);
88 0 : else if ( FILTER_TEXT_DLG == sName)
89 0 : bRet = SwIoSystem::IsDetectableText( pHeader, nLen_, 0, 0, 0, true);
90 0 : return bRet ? sName : OUString();
91 : }
92 :
93 0 : const OUString SwIoSystem::GetSubStorageName( const SfxFilter& rFltr )
94 : {
95 : // for StorageFilters also set the SubStorageName
96 0 : const OUString& rUserData = rFltr.GetUserData();
97 0 : if (rUserData == FILTER_XML ||
98 0 : rUserData == FILTER_XMLV ||
99 0 : rUserData == FILTER_XMLVW)
100 0 : return OUString("content.xml");
101 0 : if (rUserData == sWW6 || rUserData == FILTER_WW8)
102 0 : return OUString("WordDocument");
103 0 : return OUString();
104 : }
105 :
106 0 : const SfxFilter* SwIoSystem::GetFilterOfFormat(const OUString& rFmtNm,
107 : const SfxFilterContainer* pCnt)
108 : {
109 0 : SfxFilterContainer aCntSw( OUString(sSWRITER) );
110 0 : SfxFilterContainer aCntSwWeb( OUString(sSWRITERWEB) );
111 0 : const SfxFilterContainer* pFltCnt = pCnt ? pCnt : ( IsDocShellRegistered() ? &aCntSw : &aCntSwWeb );
112 :
113 : do {
114 0 : if( pFltCnt )
115 : {
116 0 : SfxFilterMatcher aMatcher( pFltCnt->GetName() );
117 0 : SfxFilterMatcherIter aIter( aMatcher );
118 0 : const SfxFilter* pFilter = aIter.First();
119 0 : while ( pFilter )
120 : {
121 0 : if( pFilter->GetUserData().equals(rFmtNm) )
122 0 : return pFilter;
123 0 : pFilter = aIter.Next();
124 0 : }
125 : }
126 0 : if( pCnt || pFltCnt == &aCntSwWeb )
127 : break;
128 0 : pFltCnt = &aCntSwWeb;
129 : } while( true );
130 0 : return 0;
131 : }
132 :
133 0 : sal_Bool SwIoSystem::IsValidStgFilter( const com::sun::star::uno::Reference < com::sun::star::embed::XStorage >& rStg, const SfxFilter& rFilter)
134 : {
135 0 : sal_Bool bRet = sal_False;
136 : try
137 : {
138 0 : sal_uLong nStgFmtId = SotStorage::GetFormatID( rStg );
139 0 : bRet = rStg->isStreamElement( OUString("content.xml") );
140 0 : if ( bRet )
141 0 : bRet = ( nStgFmtId && ( rFilter.GetFormat() == nStgFmtId ) );
142 : }
143 0 : catch ( com::sun::star::uno::Exception& )
144 : {
145 : }
146 :
147 0 : return bRet;
148 : }
149 :
150 0 : sal_Bool SwIoSystem::IsValidStgFilter(SotStorage& rStg, const SfxFilter& rFilter)
151 : {
152 0 : sal_uLong nStgFmtId = rStg.GetFormat();
153 : /*#i8409# We cannot trust the clipboard id anymore :-(*/
154 0 : if (rFilter.GetUserData() == FILTER_WW8 || rFilter.GetUserData() == sWW6)
155 0 : nStgFmtId = 0;
156 :
157 0 : sal_Bool bRet = SVSTREAM_OK == rStg.GetError() &&
158 0 : ( !nStgFmtId || rFilter.GetFormat() == nStgFmtId ) &&
159 0 : ( rStg.IsContained( SwIoSystem::GetSubStorageName( rFilter )) );
160 0 : if( bRet )
161 : {
162 : /* Bug 53445 - there are Excel Docs w/o ClipBoardId! */
163 : /* Bug 62703 - and also WinWord Docs w/o ClipBoardId! */
164 0 : if (rFilter.GetUserData() == FILTER_WW8 || rFilter.GetUserData() == sWW6)
165 : {
166 0 : bRet = (rStg.IsContained(OUString("0Table"))
167 0 : || rStg.IsContained(OUString("1Table")))
168 0 : == (rFilter.GetUserData() == FILTER_WW8);
169 0 : if (bRet && !rFilter.IsAllowedAsTemplate())
170 : {
171 : SotStorageStreamRef xRef =
172 : rStg.OpenSotStream(OUString("WordDocument"),
173 0 : STREAM_STD_READ | STREAM_NOCREATE );
174 0 : xRef->Seek(10);
175 : sal_uInt8 nByte;
176 0 : xRef->ReadUChar( nByte );
177 0 : bRet = !(nByte & 1);
178 : }
179 : }
180 : }
181 0 : return bRet;
182 : }
183 :
184 0 : void TerminateBuffer(sal_Char *pBuffer, sal_uLong nBytesRead, sal_uLong nBufferLen)
185 : {
186 : OSL_ENSURE(nBytesRead <= nBufferLen - 2,
187 : "what you read must be less than the max + null termination");
188 : OSL_ENSURE(!(nBufferLen & 0x00000001), "nMaxReadBuf must be an even number");
189 0 : if (nBytesRead <= nBufferLen - 2)
190 : {
191 0 : pBuffer[nBytesRead] = '\0';
192 0 : pBuffer[nBytesRead+1] = '\0';
193 0 : if (nBytesRead & 0x00000001)
194 0 : pBuffer[nBytesRead+2] = '\0';
195 : }
196 0 : }
197 :
198 : // Check if the file fits the corresponding format
199 : // Currently we only support our own filters
200 0 : sal_Bool SwIoSystem::IsFileFilter(SfxMedium& rMedium, const OUString& rFmtName)
201 : {
202 0 : sal_Bool bRet = sal_False;
203 :
204 0 : SfxFilterContainer aCntSw( OUString(sSWRITER) );
205 0 : SfxFilterContainer aCntSwWeb( OUString(sSWRITERWEB) );
206 0 : const SfxFilterContainer& rFltContainer = IsDocShellRegistered() ? aCntSw : aCntSwWeb;
207 :
208 0 : com::sun::star::uno::Reference < com::sun::star::embed::XStorage > xStor;
209 0 : SotStorageRef xStg;
210 0 : if (rMedium.IsStorage())
211 0 : xStor = rMedium.GetStorage();
212 : else
213 : {
214 0 : SvStream* pStream = rMedium.GetInStream();
215 0 : if ( pStream && SotStorage::IsStorageFile(pStream) )
216 0 : xStg = new SotStorage( pStream, false );
217 : }
218 :
219 0 : SfxFilterMatcher aMatcher( rFltContainer.GetName() );
220 0 : SfxFilterMatcherIter aIter( aMatcher );
221 0 : const SfxFilter* pFltr = aIter.First();
222 0 : while ( pFltr )
223 : {
224 0 : const OUString& rUserData = pFltr->GetUserData();
225 0 : if (rUserData.equals(rFmtName))
226 : {
227 0 : if( 'C' == rUserData[0] )
228 : {
229 0 : if ( xStor.is() )
230 0 : bRet = IsValidStgFilter( xStor, *pFltr );
231 0 : else if ( xStg.Is() )
232 0 : bRet = IsValidStgFilter( *xStg, *pFltr );
233 : }
234 0 : else if( !xStg.Is() && !xStor.is() )
235 : {
236 0 : SvStream* pStrm = rMedium.GetInStream();
237 0 : if( pStrm && !pStrm->GetError() )
238 : {
239 : sal_Char aBuffer[4098];
240 0 : const sal_uLong nMaxRead = sizeof(aBuffer) - 2;
241 0 : sal_uLong nBytesRead = pStrm->Read(aBuffer, nMaxRead);
242 0 : pStrm->Seek(STREAM_SEEK_TO_BEGIN);
243 0 : TerminateBuffer(aBuffer, nBytesRead, sizeof(aBuffer));
244 0 : for (sal_uInt16 i = 0; i < MAXFILTER; ++i)
245 : {
246 0 : if (aFilterDetect[i].IsFilter(rFmtName))
247 : {
248 0 : OUString sFilter(aFilterDetect[i].IsReader(aBuffer, nBytesRead));
249 0 : bRet = !sFilter.isEmpty();
250 0 : break;
251 : }
252 : }
253 : }
254 : }
255 : //The same underlying filter can appear multiple times in the
256 : //filter list, e.g. CWW8 filter twice, once for .doc and once for
257 : //.dot. We just care here if its either, not enforce that it's
258 : //both which would be a bit of an odd requirement
259 0 : if (bRet)
260 0 : break;
261 : }
262 :
263 0 : pFltr = aIter.Next();
264 : }
265 :
266 0 : return bRet;
267 : }
268 :
269 : // Check the type of the stream (file) by searching for corresponding set of bytes.
270 : // If no known type is found, return ASCII for now!
271 : // Returns the internal FilterName.
272 : // rPrefFltName is the internal FilterName that was chosen by the user in the Open Dlg.
273 0 : const SfxFilter* SwIoSystem::GetFileFilter(const OUString& rFileName,
274 : const OUString& rPrefFltName, SfxMedium* pMedium)
275 : {
276 0 : SfxFilterContainer aCntSw( OUString(sSWRITER) );
277 0 : SfxFilterContainer aCntSwWeb( OUString(sSWRITERWEB) );
278 0 : const SfxFilterContainer* pFCntnr = IsDocShellRegistered() ? &aCntSw : &aCntSwWeb;
279 :
280 0 : SfxFilterMatcher aMatcher( pFCntnr->GetName() );
281 0 : SfxFilterMatcherIter aIter( aMatcher );
282 0 : const SfxFilter* pFilter = aIter.First();
283 0 : if ( !pFilter )
284 0 : return 0;
285 :
286 0 : if( pMedium ? ( pMedium->IsStorage() || SotStorage::IsStorageFile( pMedium->GetInStream() ) ) : SotStorage::IsStorageFile( rFileName ) )
287 : {
288 : // package storage or OLEStorage based format
289 0 : SotStorageRef xStg;
290 0 : if (!pMedium )
291 : {
292 0 : INetURLObject aObj;
293 0 : aObj.SetSmartProtocol( INET_PROT_FILE );
294 0 : aObj.SetSmartURL( rFileName );
295 0 : pMedium = new SfxMedium( aObj.GetMainURL( INetURLObject::NO_DECODE ), STREAM_STD_READ );
296 : }
297 :
298 : // templates should not get precedence over "normal" filters (#i35508, #i33168)
299 0 : const SfxFilter* pTemplateFilter = 0;
300 0 : const SfxFilter* pOldFilter = pFCntnr->GetFilter4FilterName( rPrefFltName );
301 0 : bool bLookForTemplate = pOldFilter && pOldFilter->IsOwnTemplateFormat();
302 0 : if ( pMedium->IsStorage() )
303 : {
304 0 : com::sun::star::uno::Reference < com::sun::star::embed::XStorage > xStor = pMedium->GetStorage();
305 0 : if ( xStor.is() )
306 : {
307 0 : while ( pFilter )
308 : {
309 0 : if( 'C' == pFilter->GetUserData()[0] && IsValidStgFilter( xStor, *pFilter ) )
310 : {
311 0 : if ( pFilter->IsOwnTemplateFormat() && !bLookForTemplate )
312 : // found template filter; maybe there's a "normal" one also
313 0 : pTemplateFilter = pFilter;
314 : else
315 0 : return pFilter;
316 : }
317 :
318 0 : pFilter = aIter.Next();
319 : }
320 :
321 : // there's only a template filter that could be found
322 0 : if ( pTemplateFilter )
323 0 : pFilter = pTemplateFilter;
324 0 : }
325 : }
326 : else
327 : {
328 0 : SvStream* pStream = pMedium->GetInStream();
329 0 : if ( pStream && SotStorage::IsStorageFile(pStream) )
330 0 : xStg = new SotStorage( pStream, false );
331 :
332 0 : if( xStg.Is() && ( xStg->GetError() == SVSTREAM_OK ) )
333 : {
334 0 : while ( pFilter )
335 : {
336 0 : if( 'C' == pFilter->GetUserData()[0] && IsValidStgFilter( *xStg, *pFilter ) )
337 : {
338 0 : if ( pFilter->IsOwnTemplateFormat() && !bLookForTemplate )
339 : // found template filter; maybe there's a "normal" one also
340 0 : pTemplateFilter = pFilter;
341 : else
342 0 : return pFilter;
343 : }
344 :
345 0 : pFilter = aIter.Next();
346 : }
347 :
348 : // there's only a template filter that could be found
349 0 : if ( pTemplateFilter )
350 0 : pFilter = pTemplateFilter;
351 :
352 : }
353 : }
354 :
355 0 : return pFilter;
356 : }
357 :
358 : sal_Char aBuffer[4098];
359 0 : const sal_uLong nMaxRead = sizeof(aBuffer) - 2;
360 0 : sal_uLong nBytesRead = 0;
361 0 : if (pMedium)
362 : {
363 0 : SvStream* pIStrm = pMedium->GetInStream();
364 0 : if( !pIStrm || SVSTREAM_OK != pIStrm->GetError() )
365 0 : return 0;
366 0 : sal_uLong nCurrPos = pIStrm->Tell();
367 0 : nBytesRead = pIStrm->Read(aBuffer, nMaxRead);
368 0 : pIStrm->Seek( nCurrPos );
369 : }
370 :
371 0 : TerminateBuffer(aBuffer, nBytesRead, sizeof(aBuffer));
372 :
373 : {
374 0 : for( sal_uInt16 n = 0; n < MAXFILTER; ++n )
375 : {
376 0 : OUString sNm(aFilterDetect[n].IsReader(aBuffer, nBytesRead));
377 : const SfxFilter* pFilterTmp =
378 0 : sNm.isEmpty() ? 0 : SwIoSystem::GetFilterOfFormat(sNm, pFCntnr);
379 0 : if (pFilterTmp)
380 : {
381 0 : return pFilterTmp;
382 : }
383 0 : }
384 : }
385 :
386 : // no filter recognized so far; thus check "WORD 4 WORD" Filter
387 0 : if( !rFileName.isEmpty() )
388 : {
389 0 : if( pMedium )
390 0 : pMedium->CloseInStream();
391 :
392 : }
393 0 : return SwIoSystem::GetTextFilter( aBuffer, nBytesRead);
394 : }
395 :
396 0 : bool SwIoSystem::IsDetectableText(const sal_Char* pBuf, sal_uLong &rLen,
397 : rtl_TextEncoding *pCharSet, bool *pSwap, LineEnd *pLineEnd, bool bEncodedFilter)
398 : {
399 0 : bool bSwap = false;
400 0 : rtl_TextEncoding eCharSet = RTL_TEXTENCODING_DONTKNOW;
401 0 : bool bLE = true;
402 : /*See if it's a known unicode type*/
403 0 : if (rLen >= 2)
404 : {
405 0 : sal_uLong nHead=0;
406 0 : if (rLen > 2 && sal_uInt8(pBuf[0]) == 0xEF && sal_uInt8(pBuf[1]) == 0xBB &&
407 0 : sal_uInt8(pBuf[2]) == 0xBF)
408 : {
409 0 : eCharSet = RTL_TEXTENCODING_UTF8;
410 0 : nHead = 3;
411 : }
412 0 : else if (sal_uInt8(pBuf[0]) == 0xFE && sal_uInt8(pBuf[1]) == 0xFF)
413 : {
414 0 : eCharSet = RTL_TEXTENCODING_UCS2;
415 0 : bLE = false;
416 0 : nHead = 2;
417 : }
418 0 : else if (sal_uInt8(pBuf[1]) == 0xFE && sal_uInt8(pBuf[0]) == 0xFF)
419 : {
420 0 : eCharSet = RTL_TEXTENCODING_UCS2;
421 0 : nHead = 2;
422 : }
423 0 : pBuf+=nHead;
424 0 : rLen-=nHead;
425 : }
426 :
427 0 : bool bCR = false, bLF = false, bIsBareUnicode = false;
428 :
429 0 : if (eCharSet != RTL_TEXTENCODING_DONTKNOW)
430 : {
431 0 : boost::scoped_array<sal_Unicode> aWork(new sal_Unicode[rLen+1]);
432 0 : sal_Unicode *pNewBuf = aWork.get();
433 : sal_Size nNewLen;
434 0 : if (eCharSet != RTL_TEXTENCODING_UCS2)
435 : {
436 0 : nNewLen = rLen;
437 : rtl_TextToUnicodeConverter hConverter =
438 0 : rtl_createTextToUnicodeConverter(eCharSet);
439 : rtl_TextToUnicodeContext hContext =
440 0 : rtl_createTextToUnicodeContext(hConverter);
441 :
442 : sal_Size nCntBytes;
443 : sal_uInt32 nInfo;
444 : nNewLen = rtl_convertTextToUnicode( hConverter, hContext, pBuf,
445 : rLen, pNewBuf, nNewLen,
446 : (RTL_TEXTTOUNICODE_FLAGS_UNDEFINED_DEFAULT |
447 : RTL_TEXTTOUNICODE_FLAGS_MBUNDEFINED_DEFAULT |
448 0 : RTL_TEXTTOUNICODE_FLAGS_INVALID_DEFAULT), &nInfo, &nCntBytes);
449 :
450 0 : rtl_destroyTextToUnicodeContext(hConverter, hContext);
451 0 : rtl_destroyTextToUnicodeConverter(hConverter);
452 : }
453 : else
454 : {
455 0 : nNewLen = rLen/2;
456 0 : memcpy(pNewBuf, pBuf, rLen);
457 : #ifdef OSL_LITENDIAN
458 0 : bool bNativeLE = true;
459 : #else
460 : bool bNativeLE = false;
461 : #endif
462 0 : if (bLE != bNativeLE)
463 : {
464 0 : bSwap = true;
465 0 : sal_Char* pF = (sal_Char*)pNewBuf;
466 0 : sal_Char* pN = pF+1;
467 0 : for(sal_uLong n = 0; n < nNewLen; ++n, pF+=2, pN+=2 )
468 : {
469 0 : sal_Char c = *pF;
470 0 : *pF = *pN;
471 0 : *pN = c;
472 : }
473 : }
474 : }
475 :
476 0 : for (sal_uLong nCnt = 0; nCnt < nNewLen; ++nCnt, ++pNewBuf)
477 : {
478 0 : switch (*pNewBuf)
479 : {
480 : case 0xA:
481 0 : bLF = true;
482 0 : break;
483 : case 0xD:
484 0 : bCR = true;
485 0 : break;
486 : default:
487 0 : break;
488 : }
489 0 : }
490 : }
491 : else
492 : {
493 0 : for( sal_uLong nCnt = 0; nCnt < rLen; ++nCnt, ++pBuf )
494 : {
495 0 : switch (*pBuf)
496 : {
497 : case 0x0:
498 0 : if( nCnt + 1 < rLen && !*(pBuf+1) )
499 0 : return false;
500 0 : bIsBareUnicode = true;
501 0 : break;
502 : case 0xA:
503 0 : bLF = true;
504 0 : break;
505 : case 0xD:
506 0 : bCR = true;
507 0 : break;
508 : case 0xC:
509 : case 0x1A:
510 : case 0x9:
511 0 : break;
512 : default:
513 0 : break;
514 : }
515 : }
516 : }
517 :
518 0 : LineEnd eSysLE = GetSystemLineEnd();
519 : LineEnd eLineEnd;
520 0 : if (!bCR && !bLF)
521 0 : eLineEnd = eSysLE;
522 : else
523 0 : eLineEnd = bCR ? ( bLF ? LINEEND_CRLF : LINEEND_CR ) : LINEEND_LF;
524 :
525 0 : if (pCharSet)
526 0 : *pCharSet = eCharSet;
527 0 : if (pSwap)
528 0 : *pSwap = bSwap;
529 0 : if (pLineEnd)
530 0 : *pLineEnd = eLineEnd;
531 :
532 0 : return bEncodedFilter || (!bIsBareUnicode && eSysLE == eLineEnd);
533 : }
534 :
535 0 : const SfxFilter* SwIoSystem::GetTextFilter( const sal_Char* pBuf, sal_uLong nLen)
536 : {
537 0 : bool bAuto = IsDetectableText(pBuf, nLen);
538 0 : const sal_Char* pNm = bAuto ? FILTER_TEXT : FILTER_TEXT_DLG;
539 0 : return SwIoSystem::GetFilterOfFormat( OUString::createFromAscii(pNm), 0 );
540 0 : }
541 :
542 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|