Line data Source code
1 : /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 : /*
3 : * This file is part of the LibreOffice project.
4 : *
5 : * This Source Code Form is subject to the terms of the Mozilla Public
6 : * License, v. 2.0. If a copy of the MPL was not distributed with this
7 : * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 : *
9 : * This file incorporates work covered by the following license notice:
10 : *
11 : * Licensed to the Apache Software Foundation (ASF) under one or more
12 : * contributor license agreements. See the NOTICE file distributed
13 : * with this work for additional information regarding copyright
14 : * ownership. The ASF licenses this file to you under the Apache
15 : * License, Version 2.0 (the "License"); you may not use this file
16 : * except in compliance with the License. You may obtain a copy of
17 : * the License at http://www.apache.org/licenses/LICENSE-2.0 .
18 : */
19 :
20 : #include <osl/diagnose.h>
21 : #include <sal/macros.h>
22 : #include <rtl/tencinfo.h>
23 : #include <com/sun/star/lang/XMultiServiceFactory.hpp>
24 : #include <com/sun/star/io/XInputStream.hpp>
25 : #include <com/sun/star/xml/sax/XAttributeList.hpp>
26 : #include <com/sun/star/xml/sax/XDocumentHandler.hpp>
27 : #include <com/sun/star/xml/sax/XParser.hpp>
28 : #include <com/sun/star/ucb/XCommandEnvironment.hpp>
29 : #include <com/sun/star/io/XInputStream.hpp>
30 : #include <com/sun/star/uno/Reference.hxx>
31 : #include <comphelper/processfactory.hxx>
32 : #include <xmloff/attrlist.hxx>
33 :
34 : #include <ucbhelper/content.hxx>
35 :
36 : #include <tools/stream.hxx>
37 :
38 : #include "LotusWordProImportFilter.hxx"
39 :
40 : #include <vector>
41 :
42 : #include "lwpfilter.hxx"
43 :
44 : using namespace com::sun::star;
45 : using com::sun::star::uno::Sequence;
46 : using com::sun::star::lang::XComponent;
47 : using com::sun::star::uno::Any;
48 : using com::sun::star::uno::UNO_QUERY;
49 : using com::sun::star::uno::XInterface;
50 : using com::sun::star::uno::Exception;
51 : using com::sun::star::uno::RuntimeException;
52 : using com::sun::star::io::XInputStream;
53 : using com::sun::star::lang::XMultiServiceFactory;
54 : using com::sun::star::beans::PropertyValue;
55 : using com::sun::star::document::XFilter;
56 : using com::sun::star::document::XExtendedFilterDetection;
57 : using com::sun::star::ucb::XCommandEnvironment;
58 :
59 : using com::sun::star::document::XImporter;
60 : using com::sun::star::xml::sax::XAttributeList;
61 : using com::sun::star::xml::sax::XDocumentHandler;
62 : using com::sun::star::xml::sax::XParser;
63 :
64 : // W o r d P r o
65 : static const sal_Int8 header[] = { 0x57, 0x6f, 0x72, 0x64, 0x50, 0x72, 0x6f };
66 :
67 : const sal_Int32 MAXCHARS = 65534;
68 :
69 : // Simple xml importer, currently the importer is very very simple
70 : // it only extracts pure text from the wordpro file. Absolutely no formating
71 : // information is currently imported.
72 : // To reflect the current state of this importer the sax events sent
73 : // to the document handler are also the simplest possible. In addition to
74 : // the basic attributes set up for the 'office:document' element
75 : // all the imported text is inserted into 'text:p' elements.
76 : // The parser extracts the pure text and creates simple a simple 'text:p'
77 : // element to contain that text. In the event of the text exceeding
78 : // MAXCHARS new 'text:p' elements are created as needed
79 : class SimpleXMLImporter
80 : {
81 : private:
82 :
83 : uno::Reference< XDocumentHandler > m_xDocHandler;
84 : std::vector< OUString > m_vStringChunks;
85 : SvStream& m_InputStream;
86 :
87 : bool CheckValidData( sal_Int8 nChar ) const
88 : {
89 : if( ( nChar >= 0x20 && nChar <= 0x7E ) && ( nChar != 0X40 ) )
90 : return true;
91 : return false;
92 : }
93 :
94 : void addAttribute( SvXMLAttributeList* pAttrList, const char* key, const char* val )
95 : {
96 : pAttrList->AddAttribute( OUString::createFromAscii( key ), OUString::createFromAscii( val ) );
97 : }
98 :
99 : void writeTextChunk( const OUString& sChunk )
100 : {
101 : SvXMLAttributeList *pAttrList = new SvXMLAttributeList();
102 : uno::Reference < XAttributeList > xAttrList(pAttrList);
103 :
104 : pAttrList->AddAttribute( "text:style-name", "Standard" );
105 :
106 : m_xDocHandler->startElement( "text:p", xAttrList );
107 : m_xDocHandler->characters( sChunk );
108 : m_xDocHandler->endElement( "text:p" );
109 : }
110 :
111 : void writeDocContentPreamble()
112 : {
113 : SvXMLAttributeList *pDocContentPropList = new SvXMLAttributeList();
114 : uno::Reference < XAttributeList > xDocContentList(pDocContentPropList);
115 : addAttribute( pDocContentPropList, "xmlns:office", "urn:oasis:names:tc:opendocument:xmlns:office:1.0" );
116 : addAttribute( pDocContentPropList, "xmlns:style", "urn:oasis:names:tc:opendocument:xmlns:style:1.0");
117 : addAttribute( pDocContentPropList, "xmlns:text", "urn:oasis:names:tc:opendocument:xmlns:text:1.0" );
118 : addAttribute( pDocContentPropList, "xmlns:table", "urn:oasis:names:tc:opendocument:xmlns:table:1.0" );
119 : addAttribute( pDocContentPropList, "xmlns:draw", "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0" );
120 : addAttribute( pDocContentPropList, "xmlns:fo", "urn:oasis:names:tc:opendocument:xmlns:xsl-fo-compatible:1.0" );
121 : addAttribute( pDocContentPropList, "xmlns:xlink", "http://www.w3.org/1999/xlink" );
122 : addAttribute( pDocContentPropList, "xmlns:dc", "http://purl.org/dc/elements/1.1/" );
123 : addAttribute( pDocContentPropList, "xmlns:meta", "urn:oasis:names:tc:opendocument:xmlns:meta:1.0" );
124 : addAttribute( pDocContentPropList, "xmlns:number", "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0" );
125 : addAttribute( pDocContentPropList, "xmlns:svg", "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0" );
126 : addAttribute( pDocContentPropList, "xmlns:chart", "urn:oasis:names:tc:opendocument:xmlns:chart:1.0" );
127 : addAttribute( pDocContentPropList, "xmlns:dr3d", "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0" );
128 : addAttribute( pDocContentPropList, "xmlns:math", "http://www.w3.org/1998/Math/MathML" );
129 : addAttribute( pDocContentPropList, "xmlns:form", "urn:oasis:names:tc:opendocument:xmlns:form:1.0" );
130 : addAttribute( pDocContentPropList, "xmlns:script", "urn:oasis:names:tc:opendocument:xmlns:script:1.0" );
131 : addAttribute( pDocContentPropList, "xmlns:ooo", "http://openoffice.org/2004/office" );
132 : addAttribute( pDocContentPropList, "xmlns:ooow", "http://openoffice.org/2004/writer" );
133 : addAttribute( pDocContentPropList, "xmlns:oooc", "http://openoffice.org/2004/calc" );
134 : addAttribute( pDocContentPropList, "xmlns:dom", "http://www.w3.org/2001/xml-events" );
135 : addAttribute( pDocContentPropList, "xmlns:xforms", "http://www.w3.org/2002/xforms" );
136 : addAttribute( pDocContentPropList, "xmlns:xsd", "http://www.w3.org/2001/XMLSchema");
137 : addAttribute( pDocContentPropList, "xmlns:xsi", "http://www.w3.org/2001/XMLSchema-instance" );
138 : addAttribute( pDocContentPropList, "office:version", "1.0");
139 : m_xDocHandler->startElement("office:document-content" , xDocContentList );
140 : }
141 :
142 : void parseDoc()
143 : {
144 : sal_uInt8 nDelim, nDummy, nLen, nData;
145 : sal_uInt16 nOpcode;
146 : OUStringBuffer sBuf( MAXCHARS );
147 : sal_Int32 nChars = 0;
148 :
149 : while( !m_InputStream.IsEof())
150 : {
151 : m_InputStream >> nDelim;
152 : if( nDelim == 0x40 )
153 : {
154 : m_InputStream >> nDummy >> nOpcode;
155 : switch( nOpcode )
156 : {
157 : case 0xC00B: // Dictionary Word
158 : m_InputStream >> nLen >> nDummy;
159 : while( nLen > 0 && !m_InputStream.IsEof() )
160 : {
161 : sal_uInt8 nChar;
162 : m_InputStream >> nChar;
163 : if( CheckValidData( nChar ) )
164 : {
165 : sBuf.appendAscii( (sal_Char*)(&nChar),1 );
166 : if ( ++nChars >= MAXCHARS )
167 : {
168 : m_vStringChunks.push_back( sBuf.makeStringAndClear() );
169 : nChars = 0;
170 : }
171 : }
172 : nLen--;
173 : }
174 : break;
175 :
176 : case 0x0242: // Non Dictionary word
177 : m_InputStream >> nData;
178 : if( nData == 0x02 )
179 : {
180 : m_InputStream >> nLen >> nDummy;
181 : while( nLen > 0 && !m_InputStream.IsEof() )
182 : {
183 : m_InputStream >> nData;
184 : if( CheckValidData( nData ) )
185 : {
186 : sBuf.appendAscii( (sal_Char*)(&nData),1 );
187 : if ( ++nChars >= MAXCHARS )
188 : {
189 : m_vStringChunks.push_back( sBuf.makeStringAndClear() );
190 : nChars = 0;
191 : }
192 : }
193 : nLen--;
194 : }
195 : }
196 : break;
197 : }
198 : }
199 : }
200 : if ( nChars )
201 : m_vStringChunks.push_back( sBuf.makeStringAndClear() );
202 : }
203 :
204 : void writeXML()
205 : {
206 : if ( !m_vStringChunks.empty() )
207 : {
208 : m_xDocHandler->startDocument();
209 : SvXMLAttributeList *pAttrList = new SvXMLAttributeList();
210 : writeDocContentPreamble(); // writes "office:document-content" elem
211 : uno::Reference < XAttributeList > xAttrList(pAttrList);
212 :
213 : m_xDocHandler->startElement( "office:body", xAttrList );
214 :
215 : // process strings imported
216 : std::vector< OUString >::const_iterator it = m_vStringChunks.begin();
217 : std::vector< OUString >::const_iterator it_end = m_vStringChunks.end();
218 : for ( ; it!=it_end; ++it )
219 : writeTextChunk( *it );
220 :
221 : m_xDocHandler->endElement( "office:body" );
222 : m_xDocHandler->endElement( "office:document-content" );
223 : m_xDocHandler->endDocument();
224 : }
225 : }
226 : public:
227 :
228 : SimpleXMLImporter( const uno::Reference< XDocumentHandler >& xDocHandler, SvStream& rStream ) : m_xDocHandler( xDocHandler ), m_InputStream( rStream ) {}
229 :
230 : void import()
231 : {
232 : parseDoc();
233 : writeXML();
234 : }
235 : };
236 :
237 6 : sal_Bool SAL_CALL LotusWordProImportFilter::importImpl( const Sequence< ::com::sun::star::beans::PropertyValue >& aDescriptor )
238 : throw (RuntimeException)
239 : {
240 :
241 6 : sal_Int32 nLength = aDescriptor.getLength();
242 6 : const PropertyValue * pValue = aDescriptor.getConstArray();
243 6 : OUString sURL;
244 12 : for ( sal_Int32 i = 0 ; i < nLength; i++)
245 : {
246 : //Note, we should attempt to use InputStream here first!
247 6 : if ( pValue[i].Name == "URL" )
248 6 : pValue[i].Value >>= sURL;
249 : }
250 :
251 12 : SvFileStream inputStream( sURL, STREAM_READ );
252 6 : if ( inputStream.IsEof() || ( inputStream.GetError() != SVSTREAM_OK ) )
253 0 : return sal_False;
254 :
255 : // An XML import service: what we push sax messages to..
256 12 : OUString sXMLImportService ( "com.sun.star.comp.Writer.XMLImporter" );
257 :
258 12 : uno::Reference< XDocumentHandler > xInternalHandler( mxContext->getServiceManager()->createInstanceWithContext( sXMLImportService, mxContext ), UNO_QUERY );
259 12 : uno::Reference < XImporter > xImporter(xInternalHandler, UNO_QUERY);
260 6 : if (xImporter.is())
261 0 : xImporter->setTargetDocument(mxDoc);
262 :
263 12 : return ( ReadWordproFile( inputStream, xInternalHandler) == 0 );
264 :
265 : }
266 :
267 6 : sal_Bool SAL_CALL LotusWordProImportFilter::filter( const Sequence< ::com::sun::star::beans::PropertyValue >& aDescriptor )
268 : throw (RuntimeException)
269 : {
270 6 : return importImpl ( aDescriptor );
271 : }
272 0 : void SAL_CALL LotusWordProImportFilter::cancel( )
273 : throw (RuntimeException)
274 : {
275 0 : }
276 :
277 : // XImporter
278 0 : void SAL_CALL LotusWordProImportFilter::setTargetDocument( const uno::Reference< ::com::sun::star::lang::XComponent >& xDoc )
279 : throw (::com::sun::star::lang::IllegalArgumentException, RuntimeException)
280 : {
281 0 : meType = FILTER_IMPORT;
282 0 : mxDoc = xDoc;
283 0 : }
284 :
285 : // XExtendedFilterDetection
286 0 : OUString SAL_CALL LotusWordProImportFilter::detect( com::sun::star::uno::Sequence< PropertyValue >& Descriptor )
287 : throw( com::sun::star::uno::RuntimeException )
288 : {
289 :
290 0 : OUString sTypeName( "writer_LotusWordPro_Document" );
291 0 : sal_Int32 nLength = Descriptor.getLength();
292 0 : OUString sURL;
293 0 : const PropertyValue * pValue = Descriptor.getConstArray();
294 0 : uno::Reference < XInputStream > xInputStream;
295 0 : for ( sal_Int32 i = 0 ; i < nLength; i++)
296 : {
297 0 : if ( pValue[i].Name == "TypeName" )
298 0 : pValue[i].Value >>= sTypeName;
299 0 : else if ( pValue[i].Name == "InputStream" )
300 0 : pValue[i].Value >>= xInputStream;
301 0 : else if ( pValue[i].Name == "URL" )
302 0 : pValue[i].Value >>= sURL;
303 : }
304 :
305 0 : uno::Reference< com::sun::star::ucb::XCommandEnvironment > xEnv;
306 0 : if (!xInputStream.is())
307 : {
308 : try
309 : {
310 0 : ::ucbhelper::Content aContent(sURL, xEnv, mxContext);
311 0 : xInputStream = aContent.openStream();
312 : }
313 0 : catch ( Exception& )
314 : {
315 0 : return OUString();
316 : }
317 :
318 0 : if (!xInputStream.is())
319 0 : return OUString();
320 : }
321 :
322 0 : Sequence< ::sal_Int8 > aData;
323 0 : sal_Int32 nLen = SAL_N_ELEMENTS( header );
324 0 : if ( !( ( nLen == xInputStream->readBytes( aData, nLen ) )
325 0 : && ( memcmp( ( void* )header, (void*) aData.getConstArray(), nLen ) == 0 ) ) )
326 0 : sTypeName = OUString();
327 :
328 0 : return sTypeName;
329 : }
330 :
331 :
332 : // XInitialization
333 0 : void SAL_CALL LotusWordProImportFilter::initialize( const Sequence< Any >& aArguments )
334 : throw (Exception, RuntimeException)
335 : {
336 0 : Sequence < PropertyValue > aAnySeq;
337 0 : sal_Int32 nLength = aArguments.getLength();
338 0 : if ( nLength && ( aArguments[0] >>= aAnySeq ) )
339 : {
340 0 : const PropertyValue * pValue = aAnySeq.getConstArray();
341 0 : nLength = aAnySeq.getLength();
342 0 : for ( sal_Int32 i = 0 ; i < nLength; i++)
343 : {
344 0 : if ( pValue[i].Name == "Type" )
345 : {
346 0 : pValue[i].Value >>= msFilterName;
347 0 : break;
348 : }
349 : }
350 0 : }
351 0 : }
352 1 : OUString LotusWordProImportFilter_getImplementationName ()
353 : throw (RuntimeException)
354 : {
355 1 : return OUString ( "com.sun.star.comp.Writer.LotusWordProImportFilter" );
356 : }
357 :
358 : #define SERVICE_NAME1 "com.sun.star.document.ImportFilter"
359 : #define SERVICE_NAME2 "com.sun.star.document.ExtendedTypeDetection"
360 0 : sal_Bool SAL_CALL LotusWordProImportFilter_supportsService( const OUString& ServiceName )
361 : throw (RuntimeException)
362 : {
363 0 : return ServiceName == SERVICE_NAME1 || ServiceName == SERVICE_NAME2;
364 : }
365 1 : Sequence< OUString > SAL_CALL LotusWordProImportFilter_getSupportedServiceNames( )
366 : throw (RuntimeException)
367 : {
368 1 : Sequence < OUString > aRet(2);
369 1 : OUString* pArray = aRet.getArray();
370 1 : pArray[0] = OUString ( SERVICE_NAME1 );
371 1 : pArray[1] = OUString ( SERVICE_NAME2 );
372 1 : return aRet;
373 : }
374 : #undef SERVICE_NAME2
375 : #undef SERVICE_NAME1
376 :
377 1 : uno::Reference< XInterface > SAL_CALL LotusWordProImportFilter_createInstance( const uno::Reference< XMultiServiceFactory > & rSMgr)
378 : throw( Exception )
379 : {
380 1 : return (cppu::OWeakObject*) new LotusWordProImportFilter( comphelper::getComponentContext(rSMgr) );
381 : }
382 :
383 : // XServiceInfo
384 0 : OUString SAL_CALL LotusWordProImportFilter::getImplementationName( )
385 : throw (RuntimeException)
386 : {
387 0 : return LotusWordProImportFilter_getImplementationName();
388 : }
389 0 : sal_Bool SAL_CALL LotusWordProImportFilter::supportsService( const OUString& rServiceName )
390 : throw (RuntimeException)
391 : {
392 0 : return LotusWordProImportFilter_supportsService( rServiceName );
393 : }
394 0 : Sequence< OUString > SAL_CALL LotusWordProImportFilter::getSupportedServiceNames( )
395 : throw (RuntimeException)
396 : {
397 0 : return LotusWordProImportFilter_getSupportedServiceNames();
398 : }
399 :
400 : /* vim:set shiftwidth=4 softtabstop=4 expandtab: */
|